In [1]:
# !pip install git+https://github.com/alberanid/imdbpy
# !pip install pandas
# !pip install numpy
# !pip install matplotlib
# !pip install seaborn
# !pip install pandas_profiling --upgrade
# !pip install plotly
# !pip install wordcloud
# !pip install Flask
In [2]:
# Import Dataset
# Import File from Loacal Drive
# from google.colab import files
# data_to_load = files.upload()
# from google.colab import drive
# drive.mount('/content/drive')
In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
import collections
import plotly.express as px
import plotly.graph_objects as go
import nltk
import re
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.probability import FreqDist
from nltk.util import ngrams
from plotly.subplots import make_subplots
from plotly.offline import iplot, init_notebook_mode
from wordcloud import WordCloud, STOPWORDS
from pandas_profiling import ProfileReport
%matplotlib inline
warnings.filterwarnings("ignore")
In [4]:
nltk.download('all')
[nltk_data] Downloading collection 'all'
[nltk_data]    | 
[nltk_data]    | Downloading package abc to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package abc is already up-to-date!
[nltk_data]    | Downloading package alpino to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package alpino is already up-to-date!
[nltk_data]    | Downloading package biocreative_ppi to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package biocreative_ppi is already up-to-date!
[nltk_data]    | Downloading package brown to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package brown is already up-to-date!
[nltk_data]    | Downloading package brown_tei to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package brown_tei is already up-to-date!
[nltk_data]    | Downloading package cess_cat to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package cess_cat is already up-to-date!
[nltk_data]    | Downloading package cess_esp to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package cess_esp is already up-to-date!
[nltk_data]    | Downloading package chat80 to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package chat80 is already up-to-date!
[nltk_data]    | Downloading package city_database to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package city_database is already up-to-date!
[nltk_data]    | Downloading package cmudict to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package cmudict is already up-to-date!
[nltk_data]    | Downloading package comparative_sentences to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package comparative_sentences is already up-to-
[nltk_data]    |       date!
[nltk_data]    | Downloading package comtrans to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package comtrans is already up-to-date!
[nltk_data]    | Downloading package conll2000 to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package conll2000 is already up-to-date!
[nltk_data]    | Downloading package conll2002 to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package conll2002 is already up-to-date!
[nltk_data]    | Downloading package conll2007 to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package conll2007 is already up-to-date!
[nltk_data]    | Downloading package crubadan to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package crubadan is already up-to-date!
[nltk_data]    | Downloading package dependency_treebank to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package dependency_treebank is already up-to-date!
[nltk_data]    | Downloading package dolch to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package dolch is already up-to-date!
[nltk_data]    | Downloading package europarl_raw to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package europarl_raw is already up-to-date!
[nltk_data]    | Downloading package floresta to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package floresta is already up-to-date!
[nltk_data]    | Downloading package framenet_v15 to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package framenet_v15 is already up-to-date!
[nltk_data]    | Downloading package framenet_v17 to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package framenet_v17 is already up-to-date!
[nltk_data]    | Downloading package gazetteers to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package gazetteers is already up-to-date!
[nltk_data]    | Downloading package genesis to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package genesis is already up-to-date!
[nltk_data]    | Downloading package gutenberg to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package gutenberg is already up-to-date!
[nltk_data]    | Downloading package ieer to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package ieer is already up-to-date!
[nltk_data]    | Downloading package inaugural to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package inaugural is already up-to-date!
[nltk_data]    | Downloading package indian to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package indian is already up-to-date!
[nltk_data]    | Downloading package jeita to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package jeita is already up-to-date!
[nltk_data]    | Downloading package kimmo to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package kimmo is already up-to-date!
[nltk_data]    | Downloading package knbc to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package knbc is already up-to-date!
[nltk_data]    | Downloading package lin_thesaurus to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package lin_thesaurus is already up-to-date!
[nltk_data]    | Downloading package mac_morpho to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package mac_morpho is already up-to-date!
[nltk_data]    | Downloading package machado to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package machado is already up-to-date!
[nltk_data]    | Downloading package masc_tagged to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package masc_tagged is already up-to-date!
[nltk_data]    | Downloading package moses_sample to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package moses_sample is already up-to-date!
[nltk_data]    | Downloading package movie_reviews to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package movie_reviews is already up-to-date!
[nltk_data]    | Downloading package names to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package names is already up-to-date!
[nltk_data]    | Downloading package nombank.1.0 to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package nombank.1.0 is already up-to-date!
[nltk_data]    | Downloading package nps_chat to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package nps_chat is already up-to-date!
[nltk_data]    | Downloading package omw to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package omw is already up-to-date!
[nltk_data]    | Downloading package opinion_lexicon to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package opinion_lexicon is already up-to-date!
[nltk_data]    | Downloading package paradigms to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package paradigms is already up-to-date!
[nltk_data]    | Downloading package pil to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package pil is already up-to-date!
[nltk_data]    | Downloading package pl196x to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package pl196x is already up-to-date!
[nltk_data]    | Downloading package ppattach to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package ppattach is already up-to-date!
[nltk_data]    | Downloading package problem_reports to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package problem_reports is already up-to-date!
[nltk_data]    | Downloading package propbank to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package propbank is already up-to-date!
[nltk_data]    | Downloading package ptb to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package ptb is already up-to-date!
[nltk_data]    | Downloading package product_reviews_1 to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package product_reviews_1 is already up-to-date!
[nltk_data]    | Downloading package product_reviews_2 to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package product_reviews_2 is already up-to-date!
[nltk_data]    | Downloading package pros_cons to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package pros_cons is already up-to-date!
[nltk_data]    | Downloading package qc to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package qc is already up-to-date!
[nltk_data]    | Downloading package reuters to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package reuters is already up-to-date!
[nltk_data]    | Downloading package rte to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package rte is already up-to-date!
[nltk_data]    | Downloading package semcor to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package semcor is already up-to-date!
[nltk_data]    | Downloading package senseval to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package senseval is already up-to-date!
[nltk_data]    | Downloading package sentiwordnet to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package sentiwordnet is already up-to-date!
[nltk_data]    | Downloading package sentence_polarity to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package sentence_polarity is already up-to-date!
[nltk_data]    | Downloading package shakespeare to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package shakespeare is already up-to-date!
[nltk_data]    | Downloading package sinica_treebank to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package sinica_treebank is already up-to-date!
[nltk_data]    | Downloading package smultron to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package smultron is already up-to-date!
[nltk_data]    | Downloading package state_union to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package state_union is already up-to-date!
[nltk_data]    | Downloading package stopwords to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package stopwords is already up-to-date!
[nltk_data]    | Downloading package subjectivity to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package subjectivity is already up-to-date!
[nltk_data]    | Downloading package swadesh to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package swadesh is already up-to-date!
[nltk_data]    | Downloading package switchboard to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package switchboard is already up-to-date!
[nltk_data]    | Downloading package timit to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package timit is already up-to-date!
[nltk_data]    | Downloading package toolbox to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package toolbox is already up-to-date!
[nltk_data]    | Downloading package treebank to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package treebank is already up-to-date!
[nltk_data]    | Downloading package twitter_samples to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package twitter_samples is already up-to-date!
[nltk_data]    | Downloading package udhr to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package udhr is already up-to-date!
[nltk_data]    | Downloading package udhr2 to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package udhr2 is already up-to-date!
[nltk_data]    | Downloading package unicode_samples to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package unicode_samples is already up-to-date!
[nltk_data]    | Downloading package universal_treebanks_v20 to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package universal_treebanks_v20 is already up-to-
[nltk_data]    |       date!
[nltk_data]    | Downloading package verbnet to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package verbnet is already up-to-date!
[nltk_data]    | Downloading package verbnet3 to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package verbnet3 is already up-to-date!
[nltk_data]    | Downloading package webtext to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package webtext is already up-to-date!
[nltk_data]    | Downloading package wordnet to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package wordnet is already up-to-date!
[nltk_data]    | Downloading package wordnet_ic to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package wordnet_ic is already up-to-date!
[nltk_data]    | Downloading package words to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package words is already up-to-date!
[nltk_data]    | Downloading package ycoe to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package ycoe is already up-to-date!
[nltk_data]    | Downloading package rslp to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package rslp is already up-to-date!
[nltk_data]    | Downloading package maxent_treebank_pos_tagger to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package maxent_treebank_pos_tagger is already up-
[nltk_data]    |       to-date!
[nltk_data]    | Downloading package universal_tagset to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package universal_tagset is already up-to-date!
[nltk_data]    | Downloading package maxent_ne_chunker to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package maxent_ne_chunker is already up-to-date!
[nltk_data]    | Downloading package punkt to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package punkt is already up-to-date!
[nltk_data]    | Downloading package book_grammars to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package book_grammars is already up-to-date!
[nltk_data]    | Downloading package sample_grammars to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package sample_grammars is already up-to-date!
[nltk_data]    | Downloading package spanish_grammars to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package spanish_grammars is already up-to-date!
[nltk_data]    | Downloading package basque_grammars to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package basque_grammars is already up-to-date!
[nltk_data]    | Downloading package large_grammars to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package large_grammars is already up-to-date!
[nltk_data]    | Downloading package tagsets to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package tagsets is already up-to-date!
[nltk_data]    | Downloading package snowball_data to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package snowball_data is already up-to-date!
[nltk_data]    | Downloading package bllip_wsj_no_aux to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package bllip_wsj_no_aux is already up-to-date!
[nltk_data]    | Downloading package word2vec_sample to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package word2vec_sample is already up-to-date!
[nltk_data]    | Downloading package panlex_swadesh to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package panlex_swadesh is already up-to-date!
[nltk_data]    | Downloading package mte_teip5 to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package mte_teip5 is already up-to-date!
[nltk_data]    | Downloading package averaged_perceptron_tagger to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package averaged_perceptron_tagger is already up-
[nltk_data]    |       to-date!
[nltk_data]    | Downloading package averaged_perceptron_tagger_ru to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package averaged_perceptron_tagger_ru is already
[nltk_data]    |       up-to-date!
[nltk_data]    | Downloading package perluniprops to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package perluniprops is already up-to-date!
[nltk_data]    | Downloading package nonbreaking_prefixes to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package nonbreaking_prefixes is already up-to-date!
[nltk_data]    | Downloading package vader_lexicon to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package vader_lexicon is already up-to-date!
[nltk_data]    | Downloading package porter_test to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package porter_test is already up-to-date!
[nltk_data]    | Downloading package wmt15_eval to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package wmt15_eval is already up-to-date!
[nltk_data]    | Downloading package mwa_ppdb to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package mwa_ppdb is already up-to-date!
[nltk_data]    | 
[nltk_data]  Done downloading collection all
Out[4]:
True
In [5]:
# path = '/content/drive/MyDrive/Files/'

path = 'C:\\Users\\pawan\\OneDrive\\Desktop\\ott\\Data\\'
 
df_movies = pd.read_csv(path + 'ottmovies.csv')
 
df_movies.head()
Out[5]:
ID Title Year Age IMDb Rotten Tomatoes Directors Cast Genres Country Language Plotline Runtime Kind Seasons Netflix Hulu Prime Video Disney+ Type
0 1 Inception 2010 13+ 8.8 87% Christopher Nolan Leonardo DiCaprio,Joseph Gordon-Levitt,Elliot ... Action,Adventure,Sci-Fi,Thriller United States,United Kingdom English,Japanese,French Dom Cobb is a skilled thief, the absolute best... 148.0 movie NaN 1 0 0 0 0
1 2 The Matrix 1999 16+ 8.7 88% Lana Wachowski,Lilly Wachowski Keanu Reeves,Laurence Fishburne,Carrie-Anne Mo... Action,Sci-Fi United States English Thomas A. Anderson is a man living two lives. ... 136.0 movie NaN 1 0 0 0 0
2 3 Avengers: Infinity War 2018 13+ 8.4 85% Anthony Russo,Joe Russo Robert Downey Jr.,Chris Hemsworth,Mark Ruffalo... Action,Adventure,Sci-Fi United States English As the Avengers and their allies have continue... 149.0 movie NaN 1 0 0 0 0
3 4 Back to the Future 1985 7+ 8.5 96% Robert Zemeckis Michael J. Fox,Christopher Lloyd,Lea Thompson,... Adventure,Comedy,Sci-Fi United States English Marty McFly, a typical American teenager of th... 116.0 movie NaN 1 0 0 0 0
4 5 The Good, the Bad and the Ugly 1966 16+ 8.8 97% Sergio Leone Eli Wallach,Clint Eastwood,Lee Van Cleef,Aldo ... Western Italy,Spain,West Germany,United States Italian Blondie (The Good) (Clint Eastwood) is a profe... 161.0 movie NaN 1 0 1 0 0
In [6]:
# profile = ProfileReport(df_movies)
# profile
In [7]:
def data_investigate(df):
    print('No of Rows : ', df.shape[0])
    print('No of Coloums : ', df.shape[1])
    print('**'*25)
    print('Colums Names : \n', df.columns)
    print('**'*25)
    print('Datatype of Columns : \n', df.dtypes)
    print('**'*25)
    print('Missing Values : ')
    c = df.isnull().sum()
    c = c[c > 0]
    print(c)
    print('**'*25)
    print('Missing vaules %age wise :\n')
    print((100*(df.isnull().sum()/len(df.index))))
    print('**'*25)
    print('Pictorial Representation : ')
    plt.figure(figsize = (10, 10))
    sns.heatmap(df.isnull(), yticklabels = False, cbar = False)
    plt.show()
In [8]:
data_investigate(df_movies)
No of Rows :  16923
No of Coloums :  20
**************************************************
Colums Names : 
 Index(['ID', 'Title', 'Year', 'Age', 'IMDb', 'Rotten Tomatoes', 'Directors',
       'Cast', 'Genres', 'Country', 'Language', 'Plotline', 'Runtime', 'Kind',
       'Seasons', 'Netflix', 'Hulu', 'Prime Video', 'Disney+', 'Type'],
      dtype='object')
**************************************************
Datatype of Columns : 
 ID                   int64
Title               object
Year                 int64
Age                 object
IMDb               float64
Rotten Tomatoes     object
Directors           object
Cast                object
Genres              object
Country             object
Language            object
Plotline            object
Runtime            float64
Kind                object
Seasons            float64
Netflix              int64
Hulu                 int64
Prime Video          int64
Disney+              int64
Type                 int64
dtype: object
**************************************************
Missing Values : 
Age                 8457
IMDb                 328
Rotten Tomatoes    10437
Directors            357
Cast                 648
Genres               234
Country              303
Language             437
Plotline            4958
Runtime              382
Seasons            16923
dtype: int64
**************************************************
Missing vaules %age wise :

ID                   0.000000
Title                0.000000
Year                 0.000000
Age                 49.973409
IMDb                 1.938191
Rotten Tomatoes     61.673462
Directors            2.109555
Cast                 3.829108
Genres               1.382734
Country              1.790463
Language             2.582284
Plotline            29.297406
Runtime              2.257283
Kind                 0.000000
Seasons            100.000000
Netflix              0.000000
Hulu                 0.000000
Prime Video          0.000000
Disney+              0.000000
Type                 0.000000
dtype: float64
**************************************************
Pictorial Representation : 
In [9]:
# ID
# df_movies = df_movies.drop(['ID'], axis = 1)
 
# Age
df_movies.loc[df_movies['Age'].isnull() & df_movies['Disney+'] == 1, "Age"] = '13'
# df_movies.fillna({'Age' : 18}, inplace = True)
df_movies.fillna({'Age' : 'NR'}, inplace = True)
df_movies['Age'].replace({'all': '0'}, inplace = True)
df_movies['Age'].replace({'7+': '7'}, inplace = True)
df_movies['Age'].replace({'13+': '13'}, inplace = True)
df_movies['Age'].replace({'16+': '16'}, inplace = True)
df_movies['Age'].replace({'18+': '18'}, inplace = True)
# df_movies['Age'] = df_movies['Age'].astype(int)
 
# IMDb
# df_movies.fillna({'IMDb' : df_movies['IMDb'].mean()}, inplace = True)
# df_movies.fillna({'IMDb' : df_movies['IMDb'].median()}, inplace = True)
df_movies.fillna({'IMDb' : "NA"}, inplace = True)
 
# Rotten Tomatoes
df_movies['Rotten Tomatoes'] = df_movies['Rotten Tomatoes'][df_movies['Rotten Tomatoes'].notnull()].str.replace('%', '').astype(int)
# df_movies['Rotten Tomatoes'] = df_movies['Rotten Tomatoes'][df_movies['Rotten Tomatoes'].notnull()].astype(int)
# df_movies.fillna({'Rotten Tomatoes' : df_movies['Rotten Tomatoes'].mean()}, inplace = True)
# df_movies.fillna({'Rotten Tomatoes' : df_movies['Rotten Tomatoes'].median()}, inplace = True)
# df_movies['Rotten Tomatoes'] = df_movies['Rotten Tomatoes'].astype(int)
df_movies.fillna({'Rotten Tomatoes' : "NA"}, inplace = True)
 
# Directors
# df_movies = df_movies.drop(['Directors'], axis = 1)
df_movies.fillna({'Directors' : "NA"}, inplace = True)
 
# Cast
df_movies.fillna({'Cast' : "NA"}, inplace = True)
 
# Genres
df_movies.fillna({'Genres': "NA"}, inplace = True)
 
# Country
df_movies.fillna({'Country': "NA"}, inplace = True)
 
# Language
df_movies.fillna({'Language': "NA"}, inplace = True)
 
# Plotline
df_movies.fillna({'Plotline': "NA"}, inplace = True)
 
# Runtime
# df_movies.fillna({'Runtime' : df_movies['Runtime'].mean()}, inplace = True)
# df_movies['Runtime'] = df_movies['Runtime'].astype(int)
df_movies.fillna({'Runtime' : "NA"}, inplace = True)
 
# Kind
# df_movies.fillna({'Kind': "NA"}, inplace = True)
 
# Type
# df_movies.fillna({'Type': "NA"}, inplace = True)
# df_movies = df_movies.drop(['Type'], axis = 1)
 
# Seasons
# df_movies.fillna({'Seasons': 1}, inplace = True)
# df_movies.fillna({'Seasons': "NA"}, inplace = True)
df_movies = df_movies.drop(['Seasons'], axis = 1)
# df_movies['Seasons'] = df_movies['Seasons'].astype(int)
# df_movies.fillna({'Seasons' : df_movies['Seasons'].mean()}, inplace = True)
# df_movies['Seasons'] = df_movies['Seasons'].astype(int)
 
# Service Provider
df_movies['Service Provider'] = df_movies.loc[:, ['Netflix', 'Prime Video', 'Disney+', 'Hulu']].idxmax(axis = 1)
# df_movies.drop(['Netflix','Prime Video','Disney+','Hulu'], axis = 1)

# Removing Duplicate and Missing Entries
df_movies.dropna(how = 'any', inplace = True)
df_movies.drop_duplicates(inplace = True)
In [10]:
data_investigate(df_movies)
No of Rows :  16923
No of Coloums :  20
**************************************************
Colums Names : 
 Index(['ID', 'Title', 'Year', 'Age', 'IMDb', 'Rotten Tomatoes', 'Directors',
       'Cast', 'Genres', 'Country', 'Language', 'Plotline', 'Runtime', 'Kind',
       'Netflix', 'Hulu', 'Prime Video', 'Disney+', 'Type',
       'Service Provider'],
      dtype='object')
**************************************************
Datatype of Columns : 
 ID                   int64
Title               object
Year                 int64
Age                 object
IMDb                object
Rotten Tomatoes     object
Directors           object
Cast                object
Genres              object
Country             object
Language            object
Plotline            object
Runtime             object
Kind                object
Netflix              int64
Hulu                 int64
Prime Video          int64
Disney+              int64
Type                 int64
Service Provider    object
dtype: object
**************************************************
Missing Values : 
Series([], dtype: int64)
**************************************************
Missing vaules %age wise :

ID                  0.0
Title               0.0
Year                0.0
Age                 0.0
IMDb                0.0
Rotten Tomatoes     0.0
Directors           0.0
Cast                0.0
Genres              0.0
Country             0.0
Language            0.0
Plotline            0.0
Runtime             0.0
Kind                0.0
Netflix             0.0
Hulu                0.0
Prime Video         0.0
Disney+             0.0
Type                0.0
Service Provider    0.0
dtype: float64
**************************************************
Pictorial Representation : 
In [11]:
df_movies.head()
Out[11]:
ID Title Year Age IMDb Rotten Tomatoes Directors Cast Genres Country Language Plotline Runtime Kind Netflix Hulu Prime Video Disney+ Type Service Provider
0 1 Inception 2010 13 8.8 87 Christopher Nolan Leonardo DiCaprio,Joseph Gordon-Levitt,Elliot ... Action,Adventure,Sci-Fi,Thriller United States,United Kingdom English,Japanese,French Dom Cobb is a skilled thief, the absolute best... 148 movie 1 0 0 0 0 Netflix
1 2 The Matrix 1999 16 8.7 88 Lana Wachowski,Lilly Wachowski Keanu Reeves,Laurence Fishburne,Carrie-Anne Mo... Action,Sci-Fi United States English Thomas A. Anderson is a man living two lives. ... 136 movie 1 0 0 0 0 Netflix
2 3 Avengers: Infinity War 2018 13 8.4 85 Anthony Russo,Joe Russo Robert Downey Jr.,Chris Hemsworth,Mark Ruffalo... Action,Adventure,Sci-Fi United States English As the Avengers and their allies have continue... 149 movie 1 0 0 0 0 Netflix
3 4 Back to the Future 1985 7 8.5 96 Robert Zemeckis Michael J. Fox,Christopher Lloyd,Lea Thompson,... Adventure,Comedy,Sci-Fi United States English Marty McFly, a typical American teenager of th... 116 movie 1 0 0 0 0 Netflix
4 5 The Good, the Bad and the Ugly 1966 16 8.8 97 Sergio Leone Eli Wallach,Clint Eastwood,Lee Van Cleef,Aldo ... Western Italy,Spain,West Germany,United States Italian Blondie (The Good) (Clint Eastwood) is a profe... 161 movie 1 0 1 0 0 Netflix
In [12]:
df_movies.describe()
Out[12]:
ID Year Netflix Hulu Prime Video Disney+ Type
count 16923.000000 16923.000000 16923.000000 16923.000000 16923.000000 16923.000000 16923.0
mean 8462.000000 2003.211901 0.214915 0.062637 0.727235 0.033150 0.0
std 4885.393638 20.526532 0.410775 0.242315 0.445394 0.179034 0.0
min 1.000000 1901.000000 0.000000 0.000000 0.000000 0.000000 0.0
25% 4231.500000 2001.000000 0.000000 0.000000 0.000000 0.000000 0.0
50% 8462.000000 2012.000000 0.000000 0.000000 1.000000 0.000000 0.0
75% 12692.500000 2016.000000 0.000000 0.000000 1.000000 0.000000 0.0
max 16923.000000 2020.000000 1.000000 1.000000 1.000000 1.000000 0.0
In [13]:
df_movies.corr()
Out[13]:
ID Year Netflix Hulu Prime Video Disney+ Type
ID 1.000000 -0.217816 -0.644470 -0.129926 0.469301 0.263530 NaN
Year -0.217816 1.000000 0.256151 0.101337 -0.255578 -0.047258 NaN
Netflix -0.644470 0.256151 1.000000 -0.118032 -0.745141 -0.089649 NaN
Hulu -0.129926 0.101337 -0.118032 1.000000 -0.284654 -0.039693 NaN
Prime Video 0.469301 -0.255578 -0.745141 -0.284654 1.000000 -0.289008 NaN
Disney+ 0.263530 -0.047258 -0.089649 -0.039693 -0.289008 1.000000 NaN
Type NaN NaN NaN NaN NaN NaN NaN
In [14]:
# df_movies.sort_values('Year', ascending = True)
# df_movies.sort_values('IMDb', ascending = False)
In [15]:
# df_movies.to_csv(path_or_buf= '/content/drive/MyDrive/Files/updated_ottmovies.csv', index = False)
 
# path = '/content/drive/MyDrive/Files/'
 
# udf_movies = pd.read_csv(path + 'updated_ottmovies.csv')
 
# udf_movies
In [16]:
# df_netflix_movies = df_movies.loc[(df_movies['Netflix'] > 0)]
# df_hulu_movies = df_movies.loc[(df_movies['Hulu'] > 0)]
# df_prime_video_movies = df_movies.loc[(df_movies['Prime Video'] > 0)]
# df_disney_movies = df_movies.loc[(df_movies['Disney+'] > 0)]
In [17]:
df_netflix_only_movies = df_movies[(df_movies['Netflix'] == 1) & (df_movies['Hulu'] == 0) & (df_movies['Prime Video'] == 0 ) & (df_movies['Disney+'] == 0)]
df_hulu_only_movies = df_movies[(df_movies['Netflix'] == 0) & (df_movies['Hulu'] == 1) & (df_movies['Prime Video'] == 0 ) & (df_movies['Disney+'] == 0)]
df_prime_video_only_movies = df_movies[(df_movies['Netflix'] == 0) & (df_movies['Hulu'] == 0) & (df_movies['Prime Video'] == 1 ) & (df_movies['Disney+'] == 0)]
df_disney_only_movies = df_movies[(df_movies['Netflix'] == 0) & (df_movies['Hulu'] == 0) & (df_movies['Prime Video'] == 0 ) & (df_movies['Disney+'] == 1)]
In [18]:
df_movies_languages = df_movies.copy()
In [19]:
df_movies_languages.drop(df_movies_languages.loc[df_movies_languages['Language'] == "NA"].index, inplace = True)
# df_movies_languages = df_movies_languages[df_movies_languages.Language != "NA"]
# df_movies_languages['Language'] = df_movies_languages['Language'].astype(str)
In [20]:
df_movies_count_languages = df_movies_languages.copy()
In [21]:
df_movies_language = df_movies_languages.copy()
In [22]:
# Create languages dict where key=name and value = number of languages
 
languages = {}
 
for i in df_movies_count_languages['Language'].dropna():
    if i != "NA":
        #print(i,len(i.split(',')))
        languages[i] = len(i.split(','))
    else:
        languages[i] = 0
    
# Add this information to our dataframe as a new column
 
df_movies_count_languages['Number of Languages'] = df_movies_count_languages['Language'].map(languages).astype(int)
In [23]:
df_movies_mixed_languages = df_movies_count_languages.copy()
In [24]:
# Creating distinct dataframes only with the movies present on individual streaming platforms
netflix_languages_movies = df_movies_count_languages.loc[df_movies_count_languages['Netflix'] == 1]
hulu_languages_movies = df_movies_count_languages.loc[df_movies_count_languages['Hulu'] == 1]
prime_video_languages_movies = df_movies_count_languages.loc[df_movies_count_languages['Prime Video'] == 1]
disney_languages_movies = df_movies_count_languages.loc[df_movies_count_languages['Disney+'] == 1]
In [25]:
plt.figure(figsize = (10, 10))
corr = df_movies_count_languages.corr()
# Plot figsize
fig, ax = plt.subplots(figsize=(10, 8))
# Generate Heat Map, alleast annotations and place floats in map
sns.heatmap(corr, cmap = 'magma', annot = True, fmt = ".2f")
# Apply xticks
plt.xticks(range(len(corr.columns)), corr.columns);
# Apply yticks
plt.yticks(range(len(corr.columns)), corr.columns)
# show plot
plt.show()
fig.show()
<Figure size 720x720 with 0 Axes>
In [26]:
df_languages_most_movies = df_movies_count_languages.sort_values(by = 'Number of Languages', ascending = False).reset_index()
df_languages_most_movies = df_languages_most_movies.drop(['index'], axis = 1)
# filter = (df_movies_count_languages['Number of Languages'] == (df_movies_count_languages['Number of Languages'].max()))
# df_languages_most_movies = df_movies_count_languages[filter]
 
# mostest_rated_movies = df_movies_count_languages.loc[df_movies_count_languages['Number of Languages'].idxmax()]
 
print('\nMovies with Highest Ever Number of Languages are : \n')
df_languages_most_movies.head(5)
Movies with Highest Ever Number of Languages are : 

Out[26]:
ID Title Year Age IMDb Rotten Tomatoes Directors Cast Genres Country ... Plotline Runtime Kind Netflix Hulu Prime Video Disney+ Type Service Provider Number of Languages
0 15782 The Jungle Book 1967 7 7.4 94 Jon Favreau Neel Sethi,Bill Murray,Ben Kingsley,Idris Elba... Adventure,Drama,Family,Fantasy United Kingdom,United States ... While the First Order continues to ravage the ... 106 movie 0 0 0 1 0 Disney+ 16
1 456 2012 2009 13 5.8 39 Roland Emmerich John Cusack,Amanda Peet,Chiwetel Ejiofor,Thand... Action,Adventure,Sci-Fi United States ... Dr. Adrian Helmsley, part of a worldwide geoph... 158 movie 1 0 0 0 0 Netflix 10
2 3940 Free Willy 2: The Adventure Home 1995 7 5.1 50 Dwight H. Little Jason James Richter,Francis Capra,Mary Kate Sc... Adventure,Drama,Family France,United States,Luxembourg ... Willy the smart and rebellious whale and Jessi... 95 movie 0 1 0 0 0 Hulu 8
3 555 American Assassin 2017 16 6.2 34 Michael Cuesta Dylan O'Brien,Charlotte Vega,Christopher Bomfo... Action,Thriller United States ... Twenty three-year-old Mitch lost his parents t... 111 movie 1 0 0 0 0 Netflix 8
4 449 Die Another Day 2002 13 6.1 56 Lee Tamahori Pierce Brosnan,Halle Berry,Toby Stephens,Rosam... Action,Adventure,Thriller United Kingdom,United States,Spain,Iceland ... Pierce Brosnan gives one last mission as James... 133 movie 1 0 0 0 0 Netflix 8

5 rows × 21 columns

In [27]:
fig = px.bar(y = df_languages_most_movies['Title'][:15],
             x = df_languages_most_movies['Number of Languages'][:15], 
             color = df_languages_most_movies['Number of Languages'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'Movies', 'x' : 'Number of Languages'},
             title  = 'Movies with Highest Number of Languages : All Platforms')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [28]:
df_languages_least_movies = df_movies_count_languages.sort_values(by = 'Number of Languages', ascending = True).reset_index()
df_languages_least_movies = df_languages_least_movies.drop(['index'], axis = 1)
# filter = (df_movies_count_languages['Number of Languages'] == (df_movies_count_languages['Number of Languages'].min()))
# df_languages_least_movies = df_movies_count_languages[filter]

print('\nMovies with Lowest Ever Number of Languages are : \n')
df_languages_least_movies.head(5)
Movies with Lowest Ever Number of Languages are : 

Out[28]:
ID Title Year Age IMDb Rotten Tomatoes Directors Cast Genres Country ... Plotline Runtime Kind Netflix Hulu Prime Video Disney+ Type Service Provider Number of Languages
0 8405 Parasomnia 2008 16 5.2 NA William Malone Sean Young,Patrick Kilpatrick,Dylan McKnight,C... Horror,Thriller United States ... Autopsy examines how forensic examiners can he... 103 movie 0 0 1 0 0 Prime Video 1
1 10849 Road to the Open 2014 7 6.7 NA Cole Claassen Troy McKay,Phillip DeVona,Michelle Gunn,Judd N... Comedy,Drama United States ... Having a child die is the worst thing imaginab... 90 movie 0 0 1 0 0 Prime Video 1
2 10850 Blinder 2013 NR 4.5 0 Richard Gray Oliver Ackland,Jack Thompson,Zoe Carides,Anna ... Drama,Romance,Sport Australia ... Bernard and Claire Boiko won enough money on T... 102 movie 0 0 1 0 0 Prime Video 1
3 10851 The Last Hit Man 2008 NR 5.3 NA Christopher Warre Smets Joe Mantegna,Elizabeth Whitmere,Romano Orzari,... Action,Crime,Drama,Thriller Canada ... AND THEN THERE WAS LIGHT tells the story of Ba... 90 movie 0 0 1 0 0 Prime Video 1
4 10852 The Kung Fu Instructor 1979 16 6.4 NA Chung Sun Lung Ti,Yue Wong,Feng Ku,Angie Chiu,Lung-Wei W... Action,Drama Hong Kong ... NA 100 movie 0 0 1 0 0 Prime Video 1

5 rows × 21 columns

In [29]:
fig = px.bar(y = df_languages_least_movies['Title'][:15],
             x = df_languages_least_movies['Number of Languages'][:15], 
             color = df_languages_least_movies['Number of Languages'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'Movies', 'x' : 'Number of Languages'},
             title  = 'Movies with Lowest Number of Languages : All Platforms')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [30]:
print(f'''
      Total '{df_movies_count_languages['Number of Languages'].unique().shape[0]}' unique Number of Languages s were Given, They were Like this,\n
      
      {df_movies_count_languages.sort_values(by = 'Number of Languages', ascending = False)['Number of Languages'].unique()}\n
 
      The Highest Number of Languages Ever Any Movie Got is '{df_languages_most_movies['Title'][0]}' : '{df_languages_most_movies['Number of Languages'].max()}'\n
 
      The Lowest Number of Languages Ever Any Movie Got is '{df_languages_least_movies['Title'][0]}' : '{df_languages_least_movies['Number of Languages'].min()}'\n
      ''')
      Total '10' unique Number of Languages s were Given, They were Like this,

      
      [16 10  8  7  6  5  4  3  2  1]

 
      The Highest Number of Languages Ever Any Movie Got is 'The Jungle Book' : '16'

 
      The Lowest Number of Languages Ever Any Movie Got is 'Parasomnia' : '1'

      
In [31]:
netflix_languages_most_movies = df_languages_most_movies.loc[df_languages_most_movies['Netflix']==1].reset_index()
netflix_languages_most_movies = netflix_languages_most_movies.drop(['index'], axis = 1)
 
netflix_languages_least_movies = df_languages_least_movies.loc[df_languages_least_movies['Netflix']==1].reset_index()
netflix_languages_least_movies = netflix_languages_least_movies.drop(['index'], axis = 1)
 
netflix_languages_most_movies.head(5)
Out[31]:
ID Title Year Age IMDb Rotten Tomatoes Directors Cast Genres Country ... Plotline Runtime Kind Netflix Hulu Prime Video Disney+ Type Service Provider Number of Languages
0 456 2012 2009 13 5.8 39 Roland Emmerich John Cusack,Amanda Peet,Chiwetel Ejiofor,Thand... Action,Adventure,Sci-Fi United States ... Dr. Adrian Helmsley, part of a worldwide geoph... 158 movie 1 0 0 0 0 Netflix 10
1 555 American Assassin 2017 16 6.2 34 Michael Cuesta Dylan O'Brien,Charlotte Vega,Christopher Bomfo... Action,Thriller United States ... Twenty three-year-old Mitch lost his parents t... 111 movie 1 0 0 0 0 Netflix 8
2 449 Die Another Day 2002 13 6.1 56 Lee Tamahori Pierce Brosnan,Halle Berry,Toby Stephens,Rosam... Action,Adventure,Thriller United Kingdom,United States,Spain,Iceland ... Pierce Brosnan gives one last mission as James... 133 movie 1 0 0 0 0 Netflix 8
3 511 The Wandering Earth 2019 13 6 70 Frant Gwo Jing Wu,Chuxiao Qu,Guangjie Li,Man-Tat Ng,Jin ... Action,Sci-Fi China ... The sun is dying out. The earth will soon be e... 125 movie 1 0 0 0 0 Netflix 8
4 144 Babel 2006 16 7.4 69 Alejandro G. Iñárritu Brad Pitt,Cate Blanchett,Mohamed Akhzam,Peter ... Drama United States,Mexico,France,Morocco,Japan ... 4 interlocking stories connected by a single g... 143 movie 1 0 0 0 0 Netflix 8

5 rows × 21 columns

In [32]:
fig = px.bar(y = netflix_languages_most_movies['Title'][:15],
             x = netflix_languages_most_movies['Number of Languages'][:15], 
             color = netflix_languages_most_movies['Number of Languages'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'Movies', 'x' : 'Number of Languages'},
             title  = 'Movies with Highest Number of Languages : Netflix')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [33]:
fig = px.bar(y = netflix_languages_least_movies['Title'][:15],
             x = netflix_languages_least_movies['Number of Languages'][:15], 
             color = netflix_languages_least_movies['Number of Languages'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'Movies', 'x' : 'Number of Languages'},
             title  = 'Movies with Lowest Number of Languages : Netflix')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [34]:
hulu_languages_most_movies = df_languages_most_movies.loc[df_languages_most_movies['Hulu']==1].reset_index()
hulu_languages_most_movies = hulu_languages_most_movies.drop(['index'], axis = 1)
 
hulu_languages_least_movies = df_languages_least_movies.loc[df_languages_least_movies['Hulu']==1].reset_index()
hulu_languages_least_movies = hulu_languages_least_movies.drop(['index'], axis = 1)
 
hulu_languages_most_movies.head(5)
Out[34]:
ID Title Year Age IMDb Rotten Tomatoes Directors Cast Genres Country ... Plotline Runtime Kind Netflix Hulu Prime Video Disney+ Type Service Provider Number of Languages
0 3940 Free Willy 2: The Adventure Home 1995 7 5.1 50 Dwight H. Little Jason James Richter,Francis Capra,Mary Kate Sc... Adventure,Drama,Family France,United States,Luxembourg ... Willy the smart and rebellious whale and Jessi... 95 movie 0 1 0 0 0 Hulu 8
1 3777 Downsizing 2017 16 5.7 47 Alexander Payne Matt Damon,Christoph Waltz,Hong Chau,Kristen W... Drama,Fantasy,Sci-Fi United States,Norway ... A new world of possibilities awaits, thanks to... 135 movie 0 1 1 0 0 Prime Video 7
2 3523 Embrace of the Serpent 2015 NR 7.9 96 Ciro Guerra Nilbio Torres,Antonio Bolívar,Jan Bijvoet,Brio... Adventure,Biography,Drama Colombia,Venezuela,Argentina ... NA 125 movie 0 1 1 0 0 Prime Video 7
3 16424 The Exorcist 2016 16 8 89 William Friedkin Ellen Burstyn,Max von Sydow,Lee J. Cobb,Kitty ... Horror United States ... NA 122 movie 0 1 0 0 0 Hulu 7
4 3916 Terminal 2018 13 7.4 33 Steven Spielberg Tom Hanks,Catherine Zeta-Jones,Stanley Tucci,C... Comedy,Drama,Romance United States ... Victor Navorski reaches JFK airport from a pol... 128 movie 0 1 0 0 0 Hulu 7

5 rows × 21 columns

In [35]:
fig = px.bar(y = hulu_languages_most_movies['Title'][:15],
             x = hulu_languages_most_movies['Number of Languages'][:15], 
             color = hulu_languages_most_movies['Number of Languages'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'Movies', 'x' : 'Number of Languages'},
             title  = 'Movies with Highest Number of Languages : Hulu')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [36]:
fig = px.bar(y = hulu_languages_least_movies['Title'][:15],
             x = hulu_languages_least_movies['Number of Languages'][:15], 
             color = hulu_languages_least_movies['Number of Languages'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'Movies', 'x' : 'Number of Languages'},
             title  = 'Movies with Lowest Number of Languages : Hulu')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [37]:
prime_video_languages_most_movies = df_languages_most_movies.loc[df_languages_most_movies['Prime Video']==1].reset_index()
prime_video_languages_most_movies = prime_video_languages_most_movies.drop(['index'], axis = 1)
 
prime_video_languages_least_movies = df_languages_least_movies.loc[df_languages_least_movies['Prime Video']==1].reset_index()
prime_video_languages_least_movies = prime_video_languages_least_movies.drop(['index'], axis = 1)
 
prime_video_languages_most_movies.head(5)
Out[37]:
ID Title Year Age IMDb Rotten Tomatoes Directors Cast Genres Country ... Plotline Runtime Kind Netflix Hulu Prime Video Disney+ Type Service Provider Number of Languages
0 3777 Downsizing 2017 16 5.7 47 Alexander Payne Matt Damon,Christoph Waltz,Hong Chau,Kristen W... Drama,Fantasy,Sci-Fi United States,Norway ... A new world of possibilities awaits, thanks to... 135 movie 0 1 1 0 0 Prime Video 7
1 8500 Crash Test Aglaé 2017 NR 6.6 NA Eric Gravel India Hair,Julie Depardieu,Yolande Moreau,Anne... Comedy,Drama France ... Spetnaz (Special Ops) veteran Nick Cherenko le... 85 movie 0 0 1 0 0 Prime Video 7
2 11459 Off Jackson Avenue 2008 NR 6.7 59 John-Luke Montias Jessica Pimentel,Stivi Paskoski,Jun Suenaga,Jo... Crime,Drama,Thriller United States ... The Attendant tells the story of Alex, a haple... 80 movie 0 0 1 0 0 Prime Video 7
3 4866 The Vikings 1958 16 6.6 76 John McTiernan,Michael Crichton Antonio Banderas,Diane Venora,Dennis Storhøi,V... Action,Adventure,History United States ... NA 102 movie 0 0 1 0 0 Prime Video 7
4 3523 Embrace of the Serpent 2015 NR 7.9 96 Ciro Guerra Nilbio Torres,Antonio Bolívar,Jan Bijvoet,Brio... Adventure,Biography,Drama Colombia,Venezuela,Argentina ... NA 125 movie 0 1 1 0 0 Prime Video 7

5 rows × 21 columns

In [38]:
fig = px.bar(y = prime_video_languages_most_movies['Title'][:15],
             x = prime_video_languages_most_movies['Number of Languages'][:15], 
             color = prime_video_languages_most_movies['Number of Languages'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'Movies', 'x' : 'Number of Languages'},
             title  = 'Movies with Highest Number of Languages : Prime Video')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [39]:
fig = px.bar(y = prime_video_languages_least_movies['Title'][:15],
             x = prime_video_languages_least_movies['Number of Languages'][:15], 
             color = prime_video_languages_least_movies['Number of Languages'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'Movies', 'x' : 'Number of Languages'},
             title  = 'Movies with Lowest Number of Languages : Prime Video')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [40]:
disney_languages_most_movies = df_languages_most_movies.loc[df_languages_most_movies['Disney+']==1].reset_index()
disney_languages_most_movies = disney_languages_most_movies.drop(['index'], axis = 1)
 
disney_languages_least_movies = df_languages_least_movies.loc[df_languages_least_movies['Disney+']==1].reset_index()
disney_languages_least_movies = disney_languages_least_movies.drop(['index'], axis = 1)
 
disney_languages_most_movies.head(5)
Out[40]:
ID Title Year Age IMDb Rotten Tomatoes Directors Cast Genres Country ... Plotline Runtime Kind Netflix Hulu Prime Video Disney+ Type Service Provider Number of Languages
0 15782 The Jungle Book 1967 7 7.4 94 Jon Favreau Neel Sethi,Bill Murray,Ben Kingsley,Idris Elba... Adventure,Drama,Family,Fantasy United Kingdom,United States ... While the First Order continues to ravage the ... 106 movie 0 0 0 1 0 Disney+ 16
1 16916 Iron Man 1994 16 7.9 60 Jon Favreau Robert Downey Jr.,Terrence Howard,Jeff Bridges... Action,Adventure,Sci-Fi United States,Canada ... NA 126 movie 0 0 0 1 0 Disney+ 7
2 15746 Captain America: Civil War 2016 13 7.8 90 Anthony Russo,Joe Russo Chris Evans,Robert Downey Jr.,Scarlett Johanss... Action,Adventure,Sci-Fi United States ... All looks lost for the Rebellion against the E... 147 movie 0 0 0 1 0 Disney+ 6
3 15802 Miracle on 34th Street 1947 7 6.6 96 Les Mayfield Richard Attenborough,Elizabeth Perkins,Dylan M... Family,Fantasy United States ... A young boy, Pete, is found in a forest where ... 114 movie 0 0 0 1 0 Disney+ 6
4 16073 The Brave Little Toaster to the Rescue 1997 0 6.4 40 Robert C. Ramirez,Patrick A. Ventura Jessica Tuck,Chris Young,Roger Kabler,Deanna O... Animation,Adventure,Family,Fantasy United States ... Freshman Jamie Bartlett is frustrated with hig... 74 movie 0 0 0 1 0 Disney+ 6

5 rows × 21 columns

In [41]:
fig = px.bar(y = disney_languages_most_movies['Title'][:15],
             x = disney_languages_most_movies['Number of Languages'][:15], 
             color = disney_languages_most_movies['Number of Languages'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'Movies', 'x' : 'Number of Languages'},
             title  = 'Movies with Highest Number of Languages : Disney+')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [42]:
fig = px.bar(y = disney_languages_least_movies['Title'][:15],
             x = disney_languages_least_movies['Number of Languages'][:15], 
             color = disney_languages_least_movies['Number of Languages'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'Movies', 'x' : 'Number of Languages'},
             title  = 'Movies with Lowest Number of Languages : Disney+')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [43]:
print(f'''
      The Movie with Highest Number of Languages Ever Got is '{df_languages_most_movies['Title'][0]}' : '{df_languages_most_movies['Number of Languages'].max()}'\n
      The Movie with Lowest Number of Languages Ever Got is '{df_languages_least_movies['Title'][0]}' : '{df_languages_least_movies['Number of Languages'].min()}'\n
      
      The Movie with Highest Number of Languages on 'Netflix' is '{netflix_languages_most_movies['Title'][0]}' : '{netflix_languages_most_movies['Number of Languages'].max()}'\n
      The Movie with Lowest Number of Languages on 'Netflix' is '{netflix_languages_least_movies['Title'][0]}' : '{netflix_languages_least_movies['Number of Languages'].min()}'\n
      
      The Movie with Highest Number of Languages on 'Hulu' is '{hulu_languages_most_movies['Title'][0]}' : '{hulu_languages_most_movies['Number of Languages'].max()}'\n
      The Movie with Lowest Number of Languages on 'Hulu' is '{hulu_languages_least_movies['Title'][0]}' : '{hulu_languages_least_movies['Number of Languages'].min()}'\n
      
      The Movie with Highest Number of Languages on 'Prime Video' is '{prime_video_languages_most_movies['Title'][0]}' : '{prime_video_languages_most_movies['Number of Languages'].max()}'\n
      The Movie with Lowest Number of Languages on 'Prime Video' is '{prime_video_languages_least_movies['Title'][0]}' : '{prime_video_languages_least_movies['Number of Languages'].min()}'\n
      
      The Movie with Highest Number of Languages on 'Disney+' is '{disney_languages_most_movies['Title'][0]}' : '{disney_languages_most_movies['Number of Languages'].max()}'\n
      The Movie with Lowest Number of Languages on 'Disney+' is '{disney_languages_least_movies['Title'][0]}' : '{disney_languages_least_movies['Number of Languages'].min()}'\n 
      ''')
      The Movie with Highest Number of Languages Ever Got is 'The Jungle Book' : '16'

      The Movie with Lowest Number of Languages Ever Got is 'Parasomnia' : '1'

      
      The Movie with Highest Number of Languages on 'Netflix' is '2012' : '10'

      The Movie with Lowest Number of Languages on 'Netflix' is 'Iris' : '1'

      
      The Movie with Highest Number of Languages on 'Hulu' is 'Free Willy 2: The Adventure Home' : '8'

      The Movie with Lowest Number of Languages on 'Hulu' is 'The Impostor' : '1'

      
      The Movie with Highest Number of Languages on 'Prime Video' is 'Downsizing' : '7'

      The Movie with Lowest Number of Languages on 'Prime Video' is 'Parasomnia' : '1'

      
      The Movie with Highest Number of Languages on 'Disney+' is 'The Jungle Book' : '16'

      The Movie with Lowest Number of Languages on 'Disney+' is 'The Swap' : '1'
 
      
In [44]:
print(f'''
      Accross All Platforms the Average Number of Languages is '{round(df_movies_count_languages['Number of Languages'].mean(), ndigits = 2)}'\n
      The Average Number of Languages on 'Netflix' is '{round(netflix_languages_movies['Number of Languages'].mean(), ndigits = 2)}'\n
      The Average Number of Languages on 'Hulu' is '{round(hulu_languages_movies['Number of Languages'].mean(), ndigits = 2)}'\n
      The Average Number of Languages on 'Prime Video' is '{round(prime_video_languages_movies['Number of Languages'].mean(), ndigits = 2)}'\n
      The Average Number of Languages on 'Disney+' is '{round(disney_languages_movies['Number of Languages'].mean(), ndigits = 2)}'\n 
      ''')
      Accross All Platforms the Average Number of Languages is '1.28'

      The Average Number of Languages on 'Netflix' is '1.39'

      The Average Number of Languages on 'Hulu' is '1.47'

      The Average Number of Languages on 'Prime Video' is '1.24'

      The Average Number of Languages on 'Disney+' is '1.33'
 
      
In [45]:
print(f'''
      Accross All Platforms Total Count of Language is '{df_movies_count_languages['Number of Languages'].max()}'\n
      Total Count of Language on 'Netflix' is '{netflix_languages_movies['Number of Languages'].max()}'\n
      Total Count of Language on 'Hulu' is '{hulu_languages_movies['Number of Languages'].max()}'\n
      Total Count of Language on 'Prime Video' is '{prime_video_languages_movies['Number of Languages'].max()}'\n
      Total Count of Language on 'Disney+' is '{disney_languages_movies['Number of Languages'].max()}'\n 
      ''')
      Accross All Platforms Total Count of Language is '16'

      Total Count of Language on 'Netflix' is '10'

      Total Count of Language on 'Hulu' is '8'

      Total Count of Language on 'Prime Video' is '7'

      Total Count of Language on 'Disney+' is '16'
 
      
In [46]:
f, ax = plt.subplots(1, 2 , figsize = (20, 5))
sns.distplot(df_movies_count_languages['Number of Languages'],bins = 20, kde = True, ax = ax[0])
sns.boxplot(df_movies_count_languages['Number of Languages'], ax = ax[1])
plt.show()
In [47]:
# Defining plot size and title
plt.figure(figsize = (20, 5))
plt.title('Number of Languages s Per Platform')
 
# Plotting the information from each dataset into a histogram
sns.histplot(prime_video_languages_movies['Number of Languages'], color = 'lightblue', legend = True, kde = True)
sns.histplot(netflix_languages_movies['Number of Languages'], color = 'red', legend = True, kde = True)
sns.histplot(hulu_languages_movies['Number of Languages'], color = 'lightgreen', legend = True, kde = True)
sns.histplot(disney_languages_movies['Number of Languages'], color = 'darkblue', legend = True, kde = True) 
 
# Setting the legend
plt.legend(['Prime Video', 'Netflix', 'Hulu', 'Disney+'])
plt.show()
In [48]:
df_lan = df_movies_language['Language'].str.split(',').apply(pd.Series).stack()
del df_movies_language['Language']
df_lan.index = df_lan.index.droplevel(-1)
df_lan.name = 'Language'
df_movies_language = df_movies_language.join(df_lan)
df_movies_language.drop_duplicates(inplace = True)
In [49]:
df_movies_language.head(5)
Out[49]:
ID Title Year Age IMDb Rotten Tomatoes Directors Cast Genres Country Plotline Runtime Kind Netflix Hulu Prime Video Disney+ Type Service Provider Language
0 1 Inception 2010 13 8.8 87 Christopher Nolan Leonardo DiCaprio,Joseph Gordon-Levitt,Elliot ... Action,Adventure,Sci-Fi,Thriller United States,United Kingdom Dom Cobb is a skilled thief, the absolute best... 148 movie 1 0 0 0 0 Netflix English
0 1 Inception 2010 13 8.8 87 Christopher Nolan Leonardo DiCaprio,Joseph Gordon-Levitt,Elliot ... Action,Adventure,Sci-Fi,Thriller United States,United Kingdom Dom Cobb is a skilled thief, the absolute best... 148 movie 1 0 0 0 0 Netflix Japanese
0 1 Inception 2010 13 8.8 87 Christopher Nolan Leonardo DiCaprio,Joseph Gordon-Levitt,Elliot ... Action,Adventure,Sci-Fi,Thriller United States,United Kingdom Dom Cobb is a skilled thief, the absolute best... 148 movie 1 0 0 0 0 Netflix French
1 2 The Matrix 1999 16 8.7 88 Lana Wachowski,Lilly Wachowski Keanu Reeves,Laurence Fishburne,Carrie-Anne Mo... Action,Sci-Fi United States Thomas A. Anderson is a man living two lives. ... 136 movie 1 0 0 0 0 Netflix English
2 3 Avengers: Infinity War 2018 13 8.4 85 Anthony Russo,Joe Russo Robert Downey Jr.,Chris Hemsworth,Mark Ruffalo... Action,Adventure,Sci-Fi United States As the Avengers and their allies have continue... 149 movie 1 0 0 0 0 Netflix English
In [50]:
language_count = df_movies_language.groupby('Language')['Title'].count()
language_movies = df_movies_language.groupby('Language')[['Netflix', 'Hulu', 'Prime Video', 'Disney+']].sum()
language_data_movies = pd.concat([language_count, language_movies], axis = 1).reset_index().rename(columns = {'Title' : 'Movies Count'})
language_data_movies = language_data_movies.sort_values(by = 'Movies Count', ascending = False)
In [51]:
# Language with Movies Counts - All Platforms Combined
language_data_movies.sort_values(by = 'Movies Count', ascending = False)[:10]
Out[51]:
Language Movies Count Netflix Hulu Prime Video Disney+
46 English 13524 2370 950 10146 556
156 Spanish 953 356 88 528 23
53 French 875 239 102 553 28
69 Hindi 720 368 6 410 7
58 German 526 118 53 361 16
77 Italian 491 87 32 378 11
102 Mandarin 371 91 21 276 9
78 Japanese 340 101 49 199 5
139 Russian 238 60 24 159 6
11 Arabic 196 91 12 93 5
In [52]:
fig = px.bar(x = language_data_movies['Language'][:50],
             y = language_data_movies['Movies Count'][:50], 
             color = language_data_movies['Movies Count'][:50],
             color_continuous_scale = 'Teal_r', 
             labels = { 'x' : 'Language', 'y' : 'Movies Count'},
             title  = 'Major Languages : All Platforms')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [53]:
df_language_high_movies = language_data_movies.sort_values(by = 'Movies Count', ascending = False).reset_index()
df_language_high_movies = df_language_high_movies.drop(['index'], axis = 1)
# filter = (language_data_movies['Movies Count'] == (language_data_movies['Movies Count'].max()))
# df_language_high_movies = language_data_movies[filter]
 
# highest_rated_movies = language_data_movies.loc[language_data_movies['Movies Count'].idxmax()]
 
print('\nLanguage with Highest Ever Movies Count are : All Platforms Combined\n')
df_language_high_movies.head(5)
Language with Highest Ever Movies Count are : All Platforms Combined

Out[53]:
Language Movies Count Netflix Hulu Prime Video Disney+
0 English 13524 2370 950 10146 556
1 Spanish 953 356 88 528 23
2 French 875 239 102 553 28
3 Hindi 720 368 6 410 7
4 German 526 118 53 361 16
In [54]:
fig = px.bar(y = df_language_high_movies['Language'][:15],
             x = df_language_high_movies['Movies Count'][:15], 
             color = df_language_high_movies['Movies Count'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'Language', 'x' : 'Movies Count'},
             title  = 'Language with Highest Movies : All Platforms')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [55]:
df_language_low_movies = language_data_movies.sort_values(by = 'Movies Count', ascending = True).reset_index()
df_language_low_movies = df_language_low_movies.drop(['index'], axis = 1)
# filter = (language_data_movies['Movies Count'] == (language_data_movies['Movies Count'].min()))
# df_language_low_movies = language_data_movies[filter]

print('\nLanguage with Lowest Ever Movies Count are : All Platforms Combined\n')
df_language_low_movies.head(5)
Language with Lowest Ever Movies Count are : All Platforms Combined

Out[55]:
Language Movies Count Netflix Hulu Prime Video Disney+
0 Lao 1 0 0 1 0
1 Belarusian 1 0 0 1 0
2 Bemba 1 0 1 0 0
3 Hakka 1 0 0 1 0
4 Berber languages 1 1 0 0 0
In [56]:
fig = px.bar(y = df_language_low_movies['Language'][:15],
             x = df_language_low_movies['Movies Count'][:15], 
             color = df_language_low_movies['Movies Count'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'Language', 'x' : 'Movies Count'},
             title  = 'Language with Lowest Movies Count : All Platforms')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [57]:
print(f'''
      Total '{language_data_movies['Language'].unique().shape[0]}' unique Language Count s were Given, They were Like this,\n
      
      {language_data_movies.sort_values(by = 'Movies Count', ascending = False)['Language'].unique()[:5]}\n
 
      The Highest Ever Movies Count Ever Any Movie Got is '{df_language_high_movies['Language'][0]}' : '{df_language_high_movies['Movies Count'].max()}'\n
 
      The Lowest Ever Movies Count Ever Any Movie Got is '{df_language_low_movies['Language'][0]}' : '{df_language_low_movies['Movies Count'].min()}'\n
      ''')
      Total '183' unique Language Count s were Given, They were Like this,

      
      ['English' 'Spanish' 'French' 'Hindi' 'German']

 
      The Highest Ever Movies Count Ever Any Movie Got is 'English' : '13524'

 
      The Lowest Ever Movies Count Ever Any Movie Got is 'Lao' : '1'

      
In [58]:
fig = px.pie(language_data_movies[:10], names = 'Language', values = 'Movies Count', color_discrete_sequence = px.colors.sequential.Teal)
fig.update_traces(textposition = 'inside', textinfo = 'percent+label', title = 'Movies Count based on Language')
fig.show()
In [59]:
# netflix_language_movies = language_data_movies[language_data_movies['Netflix'] !=  0].sort_values(by = 'Netflix', ascending = False).reset_index()
# netflix_language_movies = netflix_language_movies.drop(['index', 'Hulu', 'Prime Video', 'Disney+', 'Movies Count'], axis = 1)
 
netflix_language_high_movies = df_language_high_movies.sort_values(by = 'Netflix', ascending = False).reset_index()
netflix_language_high_movies = netflix_language_high_movies.drop(['index'], axis = 1)
 
netflix_language_low_movies = df_language_high_movies.sort_values(by = 'Netflix', ascending = True).reset_index()
netflix_language_low_movies = netflix_language_low_movies.drop(['index'], axis = 1)
 
netflix_language_high_movies.head(5)
Out[59]:
Language Movies Count Netflix Hulu Prime Video Disney+
0 English 13524 2370 950 10146 556
1 Hindi 720 368 6 410 7
2 Spanish 953 356 88 528 23
3 French 875 239 102 553 28
4 German 526 118 53 361 16
In [60]:
fig = px.bar(x = netflix_language_high_movies['Language'][:15],
             y = netflix_language_high_movies['Netflix'][:15], 
             color = netflix_language_high_movies['Netflix'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'Language', 'x' : 'Movies Count'},
             title  = 'Language with Highest Movies : Netflix')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [61]:
# hulu_language_movies = language_data_movies[language_data_movies['Hulu'] !=  0].sort_values(by = 'Hulu', ascending = False).reset_index()
# hulu_language_movies = hulu_language_movies.drop(['index', 'Netflix', 'Prime Video', 'Disney+', 'Movies Count'], axis = 1)
 
hulu_language_high_movies = df_language_high_movies.sort_values(by = 'Hulu', ascending = False).reset_index()
hulu_language_high_movies = hulu_language_high_movies.drop(['index'], axis = 1)
 
hulu_language_low_movies = df_language_high_movies.sort_values(by = 'Hulu', ascending = True).reset_index()
hulu_language_low_movies = hulu_language_low_movies.drop(['index'], axis = 1)
 
hulu_language_high_movies.head(5)
Out[61]:
Language Movies Count Netflix Hulu Prime Video Disney+
0 English 13524 2370 950 10146 556
1 French 875 239 102 553 28
2 Spanish 953 356 88 528 23
3 German 526 118 53 361 16
4 Japanese 340 101 49 199 5
In [62]:
fig = px.bar(x = hulu_language_high_movies['Language'][:15],
             y = hulu_language_high_movies['Hulu'][:15], 
             color = hulu_language_high_movies['Hulu'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'Language', 'x' : 'Movies Count'},
             title  = 'Language with Highest Movies : Hulu')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [63]:
# prime_video_language_movies = language_data_movies[language_data_movies['Prime Video'] !=  0].sort_values(by = 'Prime Video', ascending = False).reset_index()
# prime_video_language_movies = prime_video_language_movies.drop(['index', 'Netflix', 'Hulu', 'Disney+', 'Movies Count'], axis = 1)
 
prime_video_language_high_movies = df_language_high_movies.sort_values(by = 'Prime Video', ascending = False).reset_index()
prime_video_language_high_movies = prime_video_language_high_movies.drop(['index'], axis = 1)
 
prime_video_language_low_movies = df_language_high_movies.sort_values(by = 'Prime Video', ascending = True).reset_index()
prime_video_language_low_movies = prime_video_language_low_movies.drop(['index'], axis = 1)
 
prime_video_language_high_movies.head(5)
Out[63]:
Language Movies Count Netflix Hulu Prime Video Disney+
0 English 13524 2370 950 10146 556
1 French 875 239 102 553 28
2 Spanish 953 356 88 528 23
3 Hindi 720 368 6 410 7
4 Italian 491 87 32 378 11
In [64]:
fig = px.bar(x = prime_video_language_high_movies['Language'][:15],
             y = prime_video_language_high_movies['Prime Video'][:15], 
             color = prime_video_language_high_movies['Prime Video'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'Language', 'x' : 'Movies Count'},
             title  = 'Language with Highest Movies : Prime Video')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [65]:
# disney_language_movies = language_data_movies[language_data_movies['Disney+'] !=  0].sort_values(by = 'Disney+', ascending = False).reset_index()
# disney_language_movies = disney_language_movies.drop(['index', 'Netflix', 'Hulu', 'Prime Video', 'Movies Count'], axis = 1)
 
disney_language_high_movies = df_language_high_movies.sort_values(by = 'Disney+', ascending = False).reset_index()
disney_language_high_movies = disney_language_high_movies.drop(['index'], axis = 1)
 
disney_language_low_movies = df_language_high_movies.sort_values(by = 'Disney+', ascending = True).reset_index()
disney_language_low_movies = disney_language_low_movies.drop(['index'], axis = 1)
 
disney_language_high_movies.head(5)
Out[65]:
Language Movies Count Netflix Hulu Prime Video Disney+
0 English 13524 2370 950 10146 556
1 French 875 239 102 553 28
2 Spanish 953 356 88 528 23
3 German 526 118 53 361 16
4 Italian 491 87 32 378 11
In [66]:
fig = px.bar(x = disney_language_high_movies['Language'][:15],
             y = disney_language_high_movies['Disney+'][:15], 
             color = disney_language_high_movies['Disney+'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'Language', 'x' : 'Movies Count'},
             title  = 'Language with Highest Movies : Disney+')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [67]:
f, ax = plt.subplots(1, 2 , figsize = (20, 5))
sns.distplot(language_data_movies['Movies Count'], bins = 20, kde = True, ax = ax[0])
sns.boxplot(language_data_movies['Movies Count'], ax = ax[1])
plt.show()
In [68]:
# Creating distinct dataframes only with the movies present on individual streaming platforms
netflix_language_movies = language_data_movies[language_data_movies['Netflix'] !=  0].sort_values(by = 'Netflix', ascending = False).reset_index()
netflix_language_movies = netflix_language_movies.drop(['index', 'Hulu', 'Prime Video', 'Disney+', 'Movies Count'], axis = 1)

hulu_language_movies = language_data_movies[language_data_movies['Hulu'] !=  0].sort_values(by = 'Hulu', ascending = False).reset_index()
hulu_language_movies = hulu_language_movies.drop(['index', 'Netflix', 'Prime Video', 'Disney+', 'Movies Count'], axis = 1)

prime_video_language_movies = language_data_movies[language_data_movies['Prime Video'] !=  0].sort_values(by = 'Prime Video', ascending = False).reset_index()
prime_video_language_movies = prime_video_language_movies.drop(['index', 'Netflix', 'Hulu', 'Disney+', 'Movies Count'], axis = 1)

disney_language_movies = language_data_movies[language_data_movies['Disney+'] !=  0].sort_values(by = 'Disney+', ascending = False).reset_index()
disney_language_movies = disney_language_movies.drop(['index', 'Netflix', 'Hulu', 'Prime Video', 'Movies Count'], axis = 1)
In [69]:
# Defining plot size and title
plt.figure(figsize = (20, 5))
plt.title('Language Movies Count Per Platform')
 
# Plotting the information from each dataset into a histogram
 
sns.histplot(disney_language_movies['Disney+'][:50], color = 'darkblue', legend = True, kde = True)  
sns.histplot(prime_video_language_movies['Prime Video'][:50], color = 'lightblue', legend = True, kde = True)
sns.histplot(netflix_language_movies['Netflix'][:50], color = 'red', legend = True, kde = True)
sns.histplot(hulu_language_movies['Hulu'][:50], color = 'lightgreen', legend = True, kde = True)                                
 
# Setting the legend
plt.legend(['Disney+', 'Prime Video', 'Netflix', 'Hulu'])
plt.show()
In [70]:
print(f'''
      The Language with Highest Movies Count Ever Got is '{df_language_high_movies['Language'][0]}' : '{df_language_high_movies['Movies Count'].max()}'\n
      The Language with Lowest Movies Count Ever Got is '{df_language_low_movies['Language'][0]}' : '{df_language_low_movies['Movies Count'].min()}'\n
      
      The Language with Highest Movies Count on 'Netflix' is '{netflix_language_high_movies['Language'][0]}' : '{netflix_language_high_movies['Netflix'].max()}'\n
      The Language with Lowest Movies Count on 'Netflix' is '{netflix_language_low_movies['Language'][0]}' : '{netflix_language_low_movies['Netflix'].min()}'\n
      
      The Language with Highest Movies Count on 'Hulu' is '{hulu_language_high_movies['Language'][0]}' : '{hulu_language_high_movies['Hulu'].max()}'\n
      The Language with Lowest Movies Count on 'Hulu' is '{hulu_language_low_movies['Language'][0]}' : '{hulu_language_low_movies['Hulu'].min()}'\n
      
      The Language with Highest Movies Count on 'Prime Video' is '{prime_video_language_high_movies['Language'][0]}' : '{prime_video_language_high_movies['Prime Video'].max()}'\n
      The Language with Lowest Movies Count on 'Prime Video' is '{prime_video_language_low_movies['Language'][0]}' : '{prime_video_language_low_movies['Prime Video'].min()}'\n
      
      The Language with Highest Movies Count on 'Disney+' is '{disney_language_high_movies['Language'][0]}' : '{disney_language_high_movies['Disney+'].max()}'\n
      The Language with Lowest Movies Count on 'Disney+' is '{disney_language_low_movies['Language'][0]}' : '{disney_language_low_movies['Disney+'].min()}'\n 
      ''')
      The Language with Highest Movies Count Ever Got is 'English' : '13524'

      The Language with Lowest Movies Count Ever Got is 'Lao' : '1'

      
      The Language with Highest Movies Count on 'Netflix' is 'English' : '2370'

      The Language with Lowest Movies Count on 'Netflix' is 'North American Indian' : '0'

      
      The Language with Highest Movies Count on 'Hulu' is 'English' : '950'

      The Language with Lowest Movies Count on 'Hulu' is 'North American Indian' : '0'

      
      The Language with Highest Movies Count on 'Prime Video' is 'English' : '10146'

      The Language with Lowest Movies Count on 'Prime Video' is 'Greenlandic' : '0'

      
      The Language with Highest Movies Count on 'Disney+' is 'English' : '556'

      The Language with Lowest Movies Count on 'Disney+' is 'Lao' : '0'
 
      
In [71]:
# Distribution of movies language in each platform
plt.figure(figsize = (20, 5))
plt.title('Language with Movies Count for All Platforms')
sns.violinplot(x = language_data_movies['Movies Count'][:100], color = 'gold', legend = True, kde = True, shade = False)
plt.show()
In [72]:
# Distribution of Language Movies Count in each platform
f1, ax1 = plt.subplots(1, 2 , figsize = (20, 5))
sns.violinplot(x = netflix_language_movies['Netflix'][:100], color = 'red', ax = ax1[0])
sns.violinplot(x = hulu_language_movies['Hulu'][:100], color = 'lightgreen', ax = ax1[1])
 
f2, ax2 = plt.subplots(1, 2 , figsize = (20, 5))
sns.violinplot(x = prime_video_language_movies['Prime Video'][:100], color = 'lightblue', ax = ax2[0])
sns.violinplot(x = disney_language_movies['Disney+'][:100], color = 'darkblue', ax = ax2[1])
plt.show()
In [73]:
print(f'''
      Accross All Platforms the Average Movies Count of Language is '{round(language_data_movies['Movies Count'].mean(), ndigits = 2)}'\n
      The Average Movies Count of Language on 'Netflix' is '{round(netflix_language_movies['Netflix'].mean(), ndigits = 2)}'\n
      The Average Movies Count of Language on 'Hulu' is '{round(hulu_language_movies['Hulu'].mean(), ndigits = 2)}'\n
      The Average Movies Count of Language on 'Prime Video' is '{round(prime_video_language_movies['Prime Video'].mean(), ndigits = 2)}'\n
      The Average Movies Count of Language on 'Disney+' is '{round(disney_language_movies['Disney+'].mean(), ndigits = 2)}'\n 
      ''')
      Accross All Platforms the Average Movies Count of Language is '115.41'

      The Average Movies Count of Language on 'Netflix' is '40.8'

      The Average Movies Count of Language on 'Hulu' is '20.52'

      The Average Movies Count of Language on 'Prime Video' is '95.85'

      The Average Movies Count of Language on 'Disney+' is '12.81'
 
      
In [74]:
print(f'''
      Accross All Platforms Total Count of Language is '{language_data_movies['Language'].unique().shape[0]}'\n
      Total Count of Language on 'Netflix' is '{netflix_language_movies['Language'].unique().shape[0]}'\n
      Total Count of Language on 'Hulu' is '{hulu_language_movies['Language'].unique().shape[0]}'\n
      Total Count of Language on 'Prime Video' is '{prime_video_language_movies['Language'].unique().shape[0]}'\n
      Total Count of Language on 'Disney+' is '{disney_language_movies['Language'].unique().shape[0]}'\n 
      ''')
      Accross All Platforms Total Count of Language is '183'

      Total Count of Language on 'Netflix' is '120'

      Total Count of Language on 'Hulu' is '75'

      Total Count of Language on 'Prime Video' is '155'

      Total Count of Language on 'Disney+' is '58'
 
      
In [75]:
plt.figure(figsize = (20, 5))
sns.lineplot(x = language_data_movies['Language'][:10], y = language_data_movies['Netflix'][:10], color = 'red')
sns.lineplot(x = language_data_movies['Language'][:10], y = language_data_movies['Hulu'][:10], color = 'lightgreen')
sns.lineplot(x = language_data_movies['Language'][:10], y = language_data_movies['Prime Video'][:10], color = 'lightblue')
sns.lineplot(x = language_data_movies['Language'][:10], y = language_data_movies['Disney+'][:10], color = 'darkblue')
plt.xlabel('Language', fontsize = 20)
plt.ylabel('Movies Count', fontsize = 20)
plt.show()
In [76]:
fig, axes = plt.subplots(2, 2, figsize = (20 , 10))
 
n_l_ax1 = sns.lineplot(y = language_data_movies['Language'][:10], x = language_data_movies['Netflix'][:10], color = 'red', ax = axes[0, 0])
h_l_ax2 = sns.lineplot(y = language_data_movies['Language'][:10], x = language_data_movies['Hulu'][:10], color = 'lightgreen', ax = axes[0, 1])
p_l_ax3 = sns.lineplot(y = language_data_movies['Language'][:10], x = language_data_movies['Prime Video'][:10], color = 'lightblue', ax = axes[1, 0])
d_l_ax4 = sns.lineplot(y = language_data_movies['Language'][:10], x = language_data_movies['Disney+'][:10], color = 'darkblue', ax = axes[1, 1])
 
labels = ['Netflix', 'Hulu', 'Prime Video', 'Disney+']
 
n_l_ax1.title.set_text(labels[0])
h_l_ax2.title.set_text(labels[1])
p_l_ax3.title.set_text(labels[2])
d_l_ax4.title.set_text(labels[3])
 
plt.show()
In [77]:
fig, axes = plt.subplots(2, 2, figsize = (20 , 20))
 
n_l_ax1 = sns.barplot(y = netflix_language_movies['Language'][:10], x = netflix_language_movies['Netflix'][:10], palette = 'Reds_r', ax = axes[0, 0])
h_l_ax2 = sns.barplot(y = hulu_language_movies['Language'][:10], x = hulu_language_movies['Hulu'][:10], palette = 'Greens_r', ax = axes[0, 1])
p_l_ax3 = sns.barplot(y = prime_video_language_movies['Language'][:10], x = prime_video_language_movies['Prime Video'][:10], palette = 'Blues_r', ax = axes[1, 0])
d_l_ax4 = sns.barplot(y = disney_language_movies['Language'][:10], x = disney_language_movies['Disney+'][:10], palette = 'BuPu_r', ax = axes[1, 1])
 
labels = ['Netflix', 'Hulu', 'Prime Video', 'Disney+']
 
n_l_ax1.title.set_text(labels[0])
h_l_ax2.title.set_text(labels[1])
p_l_ax3.title.set_text(labels[2])
d_l_ax4.title.set_text(labels[3])
 
plt.show()
In [78]:
# Defining plot size and title
plt.figure(figsize = (20, 5))
plt.title('Language  Movies Count Per Platform')
 
# Plotting the information from each dataset into a histogram
sns.kdeplot(netflix_language_movies['Netflix'][:10], color = 'red', legend = True)
sns.kdeplot(hulu_language_movies['Hulu'][:10], color = 'green', legend = True)
sns.kdeplot(prime_video_language_movies['Prime Video'][:10], color = 'lightblue', legend = True)
sns.kdeplot(disney_language_movies['Disney+'][:10], color = 'darkblue', legend = True)                                      
                                      
# Setting the legend
plt.legend(['Netflix', 'Hulu', 'Prime Video', 'Disney+'])
plt.show()
In [79]:
fig, axes = plt.subplots(2, 2, figsize = (20 , 20))
 
n_l_ax1 = sns.barplot(y = language_data_movies['Language'][:10], x = language_data_movies['Netflix'][:10], palette = 'Reds_r', ax = axes[0, 0])
h_l_ax2 = sns.barplot(y = language_data_movies['Language'][:10], x = language_data_movies['Hulu'][:10], palette = 'Greens_r', ax = axes[0, 1])
p_l_ax3 = sns.barplot(y = language_data_movies['Language'][:10], x = language_data_movies['Prime Video'][:10], palette = 'Blues_r', ax = axes[1, 0])
d_l_ax4 = sns.barplot(y = language_data_movies['Language'][:10], x = language_data_movies['Disney+'][:10], palette = 'BuPu_r', ax = axes[1, 1])
 
labels = ['Netflix', 'Hulu', 'Prime Video', 'Disney+']
 
n_l_ax1.title.set_text(labels[0])
h_l_ax2.title.set_text(labels[1])
p_l_ax3.title.set_text(labels[2])
d_l_ax4.title.set_text(labels[3])
 
plt.show()
In [80]:
df_movies_mixed_languages.drop(df_movies_mixed_languages.loc[df_movies_mixed_languages['Language'] == "NA"].index, inplace = True)
# df_movies_mixed_languages = df_movies_mixed_languages[df_movies_mixed_languages.Language != "NA"]
df_movies_mixed_languages.drop(df_movies_mixed_languages.loc[df_movies_mixed_languages['Number of Languages'] == 1].index, inplace = True)
In [81]:
df_movies_mixed_languages.head(5)
Out[81]:
ID Title Year Age IMDb Rotten Tomatoes Directors Cast Genres Country ... Plotline Runtime Kind Netflix Hulu Prime Video Disney+ Type Service Provider Number of Languages
0 1 Inception 2010 13 8.8 87 Christopher Nolan Leonardo DiCaprio,Joseph Gordon-Levitt,Elliot ... Action,Adventure,Sci-Fi,Thriller United States,United Kingdom ... Dom Cobb is a skilled thief, the absolute best... 148 movie 1 0 0 0 0 Netflix 3
5 6 Spider-Man: Into the Spider-Verse 2018 7 8.4 97 Bob Persichetti,Peter Ramsey,Rodney Rothman Shameik Moore,Jake Johnson,Hailee Steinfeld,Ma... Animation,Action,Adventure,Family,Sci-Fi United States ... Phil Lord and Christopher Miller, the creative... 117 movie 1 0 0 0 0 Netflix 2
6 7 The Pianist 2002 16 8.5 95 Roman Polanski Adrien Brody,Emilia Fox,Michal Zebrowski,Ed St... Biography,Drama,Music,War United Kingdom,France,Poland,Germany,United St... ... In this adaptation of the autobiography "The P... 150 movie 1 0 1 0 0 Netflix 3
7 8 Django Unchained 2012 16 8.4 87 Quentin Tarantino Jamie Foxx,Christoph Waltz,Leonardo DiCaprio,K... Drama,Western United States ... In 1858, a bounty-hunter named King Schultz se... 165 movie 1 0 0 0 0 Netflix 4
8 9 Raiders of the Lost Ark 1981 7 8.4 95 Steven Spielberg Harrison Ford,Karen Allen,Paul Freeman,Ronald ... Action,Adventure United States ... The year is 1936. An archeology professor name... 115 movie 1 0 0 0 0 Netflix 6

5 rows × 21 columns

In [82]:
mixed_languages_count = df_movies_mixed_languages.groupby('Language')['Title'].count()
mixed_languages_movies = df_movies_mixed_languages.groupby('Language')[['Netflix', 'Hulu', 'Prime Video', 'Disney+']].sum()
mixed_languages_data_movies = pd.concat([mixed_languages_count, mixed_languages_movies], axis = 1).reset_index().rename(columns = {'Title' : 'Movies Count', 'Language' : 'Mixed Language'})
mixed_languages_data_movies = mixed_languages_data_movies.sort_values(by = 'Movies Count', ascending = False)
In [83]:
mixed_languages_data_movies.head(5)
Out[83]:
Mixed Language Movies Count Netflix Hulu Prime Video Disney+
510 English,Spanish 306 68 39 208 13
181 English,French 201 50 28 122 17
244 English,German 87 16 2 66 8
334 English,Italian 78 11 6 58 3
739 Hindi,English 64 39 1 31 0
In [84]:
# Mixed Language with Movies Counts - All Platforms Combined
mixed_languages_data_movies.sort_values(by = 'Movies Count', ascending = False)[:10]
Out[84]:
Mixed Language Movies Count Netflix Hulu Prime Video Disney+
510 English,Spanish 306 68 39 208 13
181 English,French 201 50 28 122 17
244 English,German 87 16 2 66 8
334 English,Italian 78 11 6 58 3
739 Hindi,English 64 39 1 31 0
986 Spanish,English 59 24 4 32 0
626 French,English 45 7 6 32 0
355 English,Japanese 42 13 4 25 1
468 English,Russian 41 13 6 24 0
605 Filipino,Tagalog 36 22 0 16 0
In [85]:
df_mixed_languages_high_movies = mixed_languages_data_movies.sort_values(by = 'Movies Count', ascending = False).reset_index()
df_mixed_languages_high_movies = df_mixed_languages_high_movies.drop(['index'], axis = 1)
# filter = (mixed_languages_data_movies['Movies Count'] = =  (mixed_languages_data_movies['Movies Count'].max()))
# df_mixed_languages_high_movies = mixed_languages_data_movies[filter]
 
# highest_rated_movies = mixed_languages_data_movies.loc[mixed_languages_data_movies['Movies Count'].idxmax()]
 
print('\nMixed Language with Highest Ever Movies Count are : All Platforms Combined\n')
df_mixed_languages_high_movies.head(5)
Mixed Language with Highest Ever Movies Count are : All Platforms Combined

Out[85]:
Mixed Language Movies Count Netflix Hulu Prime Video Disney+
0 English,Spanish 306 68 39 208 13
1 English,French 201 50 28 122 17
2 English,German 87 16 2 66 8
3 English,Italian 78 11 6 58 3
4 Hindi,English 64 39 1 31 0
In [86]:
fig = px.bar(y = df_mixed_languages_high_movies['Mixed Language'][:15],
             x = df_mixed_languages_high_movies['Movies Count'][:15], 
             color = df_mixed_languages_high_movies['Movies Count'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'Movies', 'x' : 'Number of Mixed Language'},
             title  = 'Movies with Highest Number of Mixed Languages : All Platforms')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [87]:
df_mixed_languages_low_movies = mixed_languages_data_movies.sort_values(by = 'Movies Count', ascending = True).reset_index()
df_mixed_languages_low_movies = df_mixed_languages_low_movies.drop(['index'], axis = 1)
# filter = (mixed_languages_data_movies['Movies Count'] = =  (mixed_languages_data_movies['Movies Count'].min()))
# df_mixed_languages_low_movies = mixed_languages_data_movies[filter]
 
print('\nMixed Language with Lowest Ever Movies Count are : All Platforms Combined\n')
df_mixed_languages_low_movies.head(5)
Mixed Language with Lowest Ever Movies Count are : All Platforms Combined

Out[87]:
Mixed Language Movies Count Netflix Hulu Prime Video Disney+
0 Italian,Hungarian,Latin 1 0 0 1 0
1 English,French,Tibetan,Mandarin,Russian,Hindi,... 1 1 0 0 0
2 English,French,Turkish,German,Greek,Italian 1 0 0 1 0
3 English,French,Ukrainian 1 1 0 0 0
4 English,Gallegan 1 0 0 1 0
In [88]:
fig = px.bar(y = df_mixed_languages_low_movies['Mixed Language'][:15],
             x = df_mixed_languages_low_movies['Movies Count'][:15], 
             color = df_mixed_languages_low_movies['Movies Count'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'Movies', 'x' : 'Number of Mixed Language'},
             title  = 'Movies with Lowest Number of Mixed Languages : All Platforms')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [89]:
print(f'''
      Total '{df_movies_languages['Language'].count()}' Titles are available on All Platforms, out of which\n
      You Can Choose to see Movies from Total '{mixed_languages_data_movies['Mixed Language'].unique().shape[0]}' Mixed Language, They were Like this, \n
 
      {mixed_languages_data_movies.sort_values(by = 'Movies Count', ascending = False)['Mixed Language'].head(5).unique()} etc. \n
 
      The Mixed Language with Highest Movies Count have '{mixed_languages_data_movies['Movies Count'].max()}' Movies Available is '{df_mixed_languages_high_movies['Mixed Language'][0]}', &\n
      The Mixed Language with Lowest Movies Count have '{mixed_languages_data_movies['Movies Count'].min()}' Movies Available is '{df_mixed_languages_low_movies['Mixed Language'][0]}'
      ''')
      Total '16486' Titles are available on All Platforms, out of which

      You Can Choose to see Movies from Total '1091' Mixed Language, They were Like this, 

 
      ['English,Spanish' 'English,French' 'English,German' 'English,Italian'
 'Hindi,English'] etc. 

 
      The Mixed Language with Highest Movies Count have '306' Movies Available is 'English,Spanish', &

      The Mixed Language with Lowest Movies Count have '1' Movies Available is 'Italian,Hungarian,Latin'
      
In [90]:
fig = px.pie(mixed_languages_data_movies[:10], names = 'Mixed Language', values = 'Movies Count', color_discrete_sequence = px.colors.sequential.Teal)
fig.update_traces(textposition = 'inside', textinfo = 'percent+label', title = 'Movies Count based on Mixed Language')
fig.show()
In [91]:
# netflix_mixed_languages_movies = mixed_languages_data_movies[mixed_languages_data_movies['Netflix'] !=  0].sort_values(by = 'Netflix', ascending = False).reset_index()
# netflix_mixed_languages_movies = netflix_mixed_languages_movies.drop(['index', 'Hulu', 'Prime Video', 'Disney+', 'Movies Count'], axis = 1)
 
netflix_mixed_languages_high_movies = df_mixed_languages_high_movies.sort_values(by = 'Netflix', ascending = False).reset_index()
netflix_mixed_languages_high_movies = netflix_mixed_languages_high_movies.drop(['index'], axis = 1)
 
netflix_mixed_languages_low_movies = df_mixed_languages_high_movies.sort_values(by = 'Netflix', ascending = True).reset_index()
netflix_mixed_languages_low_movies = netflix_mixed_languages_low_movies.drop(['index'], axis = 1)
 
netflix_mixed_languages_high_movies.head(5)
Out[91]:
Mixed Language Movies Count Netflix Hulu Prime Video Disney+
0 English,Spanish 306 68 39 208 13
1 English,French 201 50 28 122 17
2 Hindi,English 64 39 1 31 0
3 Spanish,English 59 24 4 32 0
4 Filipino,Tagalog 36 22 0 16 0
In [92]:
# hulu_mixed_languages_movies = mixed_languages_data_movies[mixed_languages_data_movies['Hulu'] !=  0].sort_values(by = 'Hulu', ascending = False).reset_index()
# hulu_mixed_languages_movies = hulu_mixed_languages_movies.drop(['index', 'Netflix', 'Prime Video', 'Disney+', 'Movies Count'], axis = 1)
 
hulu_mixed_languages_high_movies = df_mixed_languages_high_movies.sort_values(by = 'Hulu', ascending = False).reset_index()
hulu_mixed_languages_high_movies = hulu_mixed_languages_high_movies.drop(['index'], axis = 1)
 
hulu_mixed_languages_low_movies = df_mixed_languages_high_movies.sort_values(by = 'Hulu', ascending = True).reset_index()
hulu_mixed_languages_low_movies = hulu_mixed_languages_low_movies.drop(['index'], axis = 1)
 
hulu_mixed_languages_high_movies.head(5)
Out[92]:
Mixed Language Movies Count Netflix Hulu Prime Video Disney+
0 English,Spanish 306 68 39 208 13
1 English,French 201 50 28 122 17
2 English,Mandarin 36 12 8 20 3
3 English,Italian 78 11 6 58 3
4 French,English 45 7 6 32 0
In [93]:
# prime_video_mixed_languages_movies = mixed_languages_data_movies[mixed_languages_data_movies['Prime Video'] !=  0].sort_values(by = 'Prime Video', ascending = False).reset_index()
# prime_video_mixed_languages_movies = prime_video_mixed_languages_movies.drop(['index', 'Netflix', 'Hulu', 'Disney+', 'Movies Count'], axis = 1)
 
prime_video_mixed_languages_high_movies = df_mixed_languages_high_movies.sort_values(by = 'Prime Video', ascending = False).reset_index()
prime_video_mixed_languages_high_movies = prime_video_mixed_languages_high_movies.drop(['index'], axis = 1)
 
prime_video_mixed_languages_low_movies = df_mixed_languages_high_movies.sort_values(by = 'Prime Video', ascending = True).reset_index()
prime_video_mixed_languages_low_movies = prime_video_mixed_languages_low_movies.drop(['index'], axis = 1)
 
prime_video_mixed_languages_high_movies.head(5)
Out[93]:
Mixed Language Movies Count Netflix Hulu Prime Video Disney+
0 English,Spanish 306 68 39 208 13
1 English,French 201 50 28 122 17
2 English,German 87 16 2 66 8
3 English,Italian 78 11 6 58 3
4 Spanish,English 59 24 4 32 0
In [94]:
# disney_mixed_languages_movies = mixed_languages_data_movies[mixed_languages_data_movies['Disney+'] !=  0].sort_values(by = 'Disney+', ascending = False).reset_index()
# disney_mixed_languages_movies = disney_mixed_languages_movies.drop(['index', 'Netflix', 'Hulu', 'Prime Video', 'Movies Count'], axis = 1)
 
disney_mixed_languages_high_movies = df_mixed_languages_high_movies.sort_values(by = 'Disney+', ascending = False).reset_index()
disney_mixed_languages_high_movies = disney_mixed_languages_high_movies.drop(['index'], axis = 1)
 
disney_mixed_languages_low_movies = df_mixed_languages_high_movies.sort_values(by = 'Disney+', ascending = True).reset_index()
disney_mixed_languages_low_movies = disney_mixed_languages_low_movies.drop(['index'], axis = 1)
 
disney_mixed_languages_high_movies.head(5)
Out[94]:
Mixed Language Movies Count Netflix Hulu Prime Video Disney+
0 English,French 201 50 28 122 17
1 English,Spanish 306 68 39 208 13
2 English,German 87 16 2 66 8
3 English,Portuguese 19 2 3 14 4
4 English,Italian 78 11 6 58 3
In [95]:
f, ax = plt.subplots(1, 2 , figsize = (20, 5))
sns.distplot(mixed_languages_data_movies['Movies Count'], bins = 20, kde = True, ax = ax[0])
sns.boxplot(mixed_languages_data_movies['Movies Count'], ax = ax[1])
plt.show()
In [96]:
# Creating distinct dataframes only with the movies present on individual streaming platforms
netflix_mixed_languages_movies = mixed_languages_data_movies[mixed_languages_data_movies['Netflix'] !=  0].sort_values(by = 'Netflix', ascending = False).reset_index()
netflix_mixed_languages_movies = netflix_mixed_languages_movies.drop(['index', 'Hulu', 'Prime Video', 'Disney+', 'Movies Count'], axis = 1)

hulu_mixed_languages_movies = mixed_languages_data_movies[mixed_languages_data_movies['Hulu'] !=  0].sort_values(by = 'Hulu', ascending = False).reset_index()
hulu_mixed_languages_movies = hulu_mixed_languages_movies.drop(['index', 'Netflix', 'Prime Video', 'Disney+', 'Movies Count'], axis = 1)

prime_video_mixed_languages_movies = mixed_languages_data_movies[mixed_languages_data_movies['Prime Video'] !=  0].sort_values(by = 'Prime Video', ascending = False).reset_index()
prime_video_mixed_languages_movies = prime_video_mixed_languages_movies.drop(['index', 'Netflix', 'Hulu', 'Disney+', 'Movies Count'], axis = 1)

disney_mixed_languages_movies = mixed_languages_data_movies[mixed_languages_data_movies['Disney+'] !=  0].sort_values(by = 'Disney+', ascending = False).reset_index()
disney_mixed_languages_movies = disney_mixed_languages_movies.drop(['index', 'Netflix', 'Hulu', 'Prime Video', 'Movies Count'], axis = 1)
In [97]:
# Defining plot size and title
plt.figure(figsize = (20, 5))
plt.title('Mixed Language Movies Count Per Platform')
 
# Plotting the information from each dataset into a histogram
 
sns.histplot(prime_video_mixed_languages_movies['Prime Video'][:100], color = 'lightblue', legend = True, kde = True)
sns.histplot(netflix_mixed_languages_movies['Netflix'][:100], color = 'red', legend = True, kde = True)
sns.histplot(hulu_mixed_languages_movies['Hulu'][:100], color = 'lightgreen', legend = True, kde = True)
sns.histplot(disney_mixed_languages_movies['Disney+'][:100], color = 'darkblue', legend = True, kde = True)                                
 
# Setting the legend
plt.legend(['Prime Video', 'Netflix', 'Hulu', 'Disney+'])
plt.show()
In [98]:
print(f'''
      The Mixed Language with Highest Movies Count Ever Got is '{df_mixed_languages_high_movies['Mixed Language'][0]}' : '{df_mixed_languages_high_movies['Movies Count'].max()}'\n
      The Mixed Language with Lowest Movies Count Ever Got is '{df_mixed_languages_low_movies['Mixed Language'][0]}' : '{df_mixed_languages_low_movies['Movies Count'].min()}'\n
      
      The Mixed Language with Highest Movies Count on 'Netflix' is '{netflix_mixed_languages_high_movies['Mixed Language'][0]}' : '{netflix_mixed_languages_high_movies['Netflix'].max()}'\n
      The Mixed Language with Lowest Movies Count on 'Netflix' is '{netflix_mixed_languages_low_movies['Mixed Language'][0]}' : '{netflix_mixed_languages_low_movies['Netflix'].min()}'\n
      
      The Mixed Language with Highest Movies Count on 'Hulu' is '{hulu_mixed_languages_high_movies['Mixed Language'][0]}' : '{hulu_mixed_languages_high_movies['Hulu'].max()}'\n
      The Mixed Language with Lowest Movies Count on 'Hulu' is '{hulu_mixed_languages_low_movies['Mixed Language'][0]}' : '{hulu_mixed_languages_low_movies['Hulu'].min()}'\n
      
      The Mixed Language with Highest Movies Count on 'Prime Video' is '{prime_video_mixed_languages_high_movies['Mixed Language'][0]}' : '{prime_video_mixed_languages_high_movies['Prime Video'].max()}'\n
      The Mixed Language with Lowest Movies Count on 'Prime Video' is '{prime_video_mixed_languages_low_movies['Mixed Language'][0]}' : '{prime_video_mixed_languages_low_movies['Prime Video'].min()}'\n
      
      The Mixed Language with Highest Movies Count on 'Disney+' is '{disney_mixed_languages_high_movies['Mixed Language'][0]}' : '{disney_mixed_languages_high_movies['Disney+'].max()}'\n
      The Mixed Language with Lowest Movies Count on 'Disney+' is '{disney_mixed_languages_low_movies['Mixed Language'][0]}' : '{disney_mixed_languages_low_movies['Disney+'].min()}'\n 
      ''')
      The Mixed Language with Highest Movies Count Ever Got is 'English,Spanish' : '306'

      The Mixed Language with Lowest Movies Count Ever Got is 'Italian,Hungarian,Latin' : '1'

      
      The Mixed Language with Highest Movies Count on 'Netflix' is 'English,Spanish' : '68'

      The Mixed Language with Lowest Movies Count on 'Netflix' is 'English,Polish,Cantonese,Italian' : '0'

      
      The Mixed Language with Highest Movies Count on 'Hulu' is 'English,Spanish' : '39'

      The Mixed Language with Lowest Movies Count on 'Hulu' is 'English,Polish,Cantonese,Italian' : '0'

      
      The Mixed Language with Highest Movies Count on 'Prime Video' is 'English,Spanish' : '208'

      The Mixed Language with Lowest Movies Count on 'Prime Video' is 'Yoruba,Ibo,English' : '0'

      
      The Mixed Language with Highest Movies Count on 'Disney+' is 'English,French' : '17'

      The Mixed Language with Lowest Movies Count on 'Disney+' is 'English,Polish,Cantonese,Italian' : '0'
 
      
In [99]:
print(f'''
      Accross All Platforms the Average Movies Count of Mixed Language is '{round(mixed_languages_data_movies['Movies Count'].mean(), ndigits = 2)}'\n
      The Average Movies Count of Mixed Language on 'Netflix' is '{round(netflix_mixed_languages_movies['Netflix'].mean(), ndigits = 2)}'\n
      The Average Movies Count of Mixed Language on 'Hulu' is '{round(hulu_mixed_languages_movies['Hulu'].mean(), ndigits = 2)}'\n
      The Average Movies Count of Mixed Language on 'Prime Video' is '{round(prime_video_mixed_languages_movies['Prime Video'].mean(), ndigits = 2)}'\n
      The Average Movies Count of Mixed Language on 'Disney+' is '{round(disney_mixed_languages_movies['Disney+'].mean(), ndigits = 2)}'\n 
      ''')
      Accross All Platforms the Average Movies Count of Mixed Language is '2.76'

      The Average Movies Count of Mixed Language on 'Netflix' is '2.06'

      The Average Movies Count of Mixed Language on 'Hulu' is '1.82'

      The Average Movies Count of Mixed Language on 'Prime Video' is '2.57'

      The Average Movies Count of Mixed Language on 'Disney+' is '1.89'
 
      
In [100]:
print(f'''
      Accross All Platforms Total Count of Mixed Language is '{mixed_languages_data_movies['Mixed Language'].unique().shape[0]}'\n
      Total Count of Mixed Language on 'Netflix' is '{netflix_mixed_languages_movies['Mixed Language'].unique().shape[0]}'\n
      Total Count of Mixed Language on 'Hulu' is '{hulu_mixed_languages_movies['Mixed Language'].unique().shape[0]}'\n
      Total Count of Mixed Language on 'Prime Video' is '{prime_video_mixed_languages_movies['Mixed Language'].unique().shape[0]}'\n
      Total Count of Mixed Language on 'Disney+' is '{disney_mixed_languages_movies['Mixed Language'].unique().shape[0]}'\n 
      ''')
      Accross All Platforms Total Count of Mixed Language is '1091'

      Total Count of Mixed Language on 'Netflix' is '417'

      Total Count of Mixed Language on 'Hulu' is '157'

      Total Count of Mixed Language on 'Prime Video' is '751'

      Total Count of Mixed Language on 'Disney+' is '57'
 
      
In [101]:
plt.figure(figsize = (20, 5))
sns.lineplot(x = mixed_languages_data_movies['Mixed Language'][:5], y = mixed_languages_data_movies['Netflix'][:5], color = 'red')
sns.lineplot(x = mixed_languages_data_movies['Mixed Language'][:5], y = mixed_languages_data_movies['Hulu'][:5], color = 'lightgreen')
sns.lineplot(x = mixed_languages_data_movies['Mixed Language'][:5], y = mixed_languages_data_movies['Prime Video'][:5], color = 'lightblue')
sns.lineplot(x = mixed_languages_data_movies['Mixed Language'][:5], y = mixed_languages_data_movies['Disney+'][:5], color = 'darkblue')
plt.xlabel('Mixed Language', fontsize = 15)
plt.ylabel('Movies Count', fontsize = 15)
plt.show()
In [102]:
fig, axes = plt.subplots(2, 2, figsize = (20 , 20))
 
n_l_ax1 = sns.barplot(y = mixed_languages_data_movies['Mixed Language'][:10], x = mixed_languages_data_movies['Netflix'][:10], palette = 'Reds_r', ax = axes[0, 0])
h_l_ax2 = sns.barplot(y = mixed_languages_data_movies['Mixed Language'][:10], x = mixed_languages_data_movies['Hulu'][:10], palette = 'Greens_r', ax = axes[0, 1])
p_l_ax3 = sns.barplot(y = mixed_languages_data_movies['Mixed Language'][:10], x = mixed_languages_data_movies['Prime Video'][:10], palette = 'Blues_r', ax = axes[1, 0])
d_l_ax4 = sns.barplot(y = mixed_languages_data_movies['Mixed Language'][:10], x = mixed_languages_data_movies['Disney+'][:10], palette = 'BuPu_r', ax = axes[1, 1])
 
labels = ['Netflix', 'Hulu', 'Prime Video', 'Disney+']
 
n_l_ax1.title.set_text(labels[0])
h_l_ax2.title.set_text(labels[1])
p_l_ax3.title.set_text(labels[2])
d_l_ax4.title.set_text(labels[3])
 
plt.show()
In [103]:
fig, axes = plt.subplots(2, 2, figsize = (20 , 10))
 
n_ml_ax1 = sns.lineplot(y = mixed_languages_data_movies['Mixed Language'][:10], x = mixed_languages_data_movies['Netflix'][:10], color = 'red', ax = axes[0, 0])
h_ml_ax2 = sns.lineplot(y = mixed_languages_data_movies['Mixed Language'][:10], x = mixed_languages_data_movies['Hulu'][:10], color = 'lightgreen', ax = axes[0, 1])
p_ml_ax3 = sns.lineplot(y = mixed_languages_data_movies['Mixed Language'][:10], x = mixed_languages_data_movies['Prime Video'][:10], color = 'lightblue', ax = axes[1, 0])
d_ml_ax4 = sns.lineplot(y = mixed_languages_data_movies['Mixed Language'][:10], x = mixed_languages_data_movies['Disney+'][:10], color = 'darkblue', ax = axes[1, 1])
 
labels = ['Netflix', 'Hulu', 'Prime Video', 'Disney+']
 
n_ml_ax1.title.set_text(labels[0])
h_ml_ax2.title.set_text(labels[1])
p_ml_ax3.title.set_text(labels[2])
d_ml_ax4.title.set_text(labels[3])
 
plt.show()
In [104]:
# Defining plot size and title
plt.figure(figsize = (20, 5))
plt.title('Mixed Language  Movies Count Per Platform')
 
# Plotting the information from each dataset into a histogram
sns.kdeplot(netflix_mixed_languages_movies['Netflix'][:50], color = 'red', legend = True)
sns.kdeplot(hulu_mixed_languages_movies['Hulu'][:50], color = 'green', legend = True)
sns.kdeplot(prime_video_mixed_languages_movies['Prime Video'][:50], color = 'lightblue', legend = True)
sns.kdeplot(disney_mixed_languages_movies['Disney+'][:50], color = 'darkblue', legend = True)                                      

# Setting the legend
plt.legend(['Netflix', 'Hulu', 'Prime Video', 'Disney+'])
plt.show()
In [105]:
fig, axes = plt.subplots(2, 2, figsize = (20 , 20))
 
n_ml_ax1 = sns.barplot(y = netflix_mixed_languages_movies['Mixed Language'][:10], x = netflix_mixed_languages_movies['Netflix'][:10], palette = 'Reds_r', ax = axes[0, 0])
h_ml_ax2 = sns.barplot(y = hulu_mixed_languages_movies['Mixed Language'][:10], x = hulu_mixed_languages_movies['Hulu'][:10], palette = 'Greens_r', ax = axes[0, 1])
p_ml_ax3 = sns.barplot(y = prime_video_mixed_languages_movies['Mixed Language'][:10], x = prime_video_mixed_languages_movies['Prime Video'][:10], palette = 'Blues_r', ax = axes[1, 0])
d_ml_ax4 = sns.barplot(y = disney_mixed_languages_movies['Mixed Language'][:10], x = disney_mixed_languages_movies['Disney+'][:10], palette = 'BuPu_r', ax = axes[1, 1])
 
labels = ['Netflix', 'Hulu', 'Prime Video', 'Disney+']
 
n_ml_ax1.title.set_text(labels[0])
h_ml_ax2.title.set_text(labels[1])
p_ml_ax3.title.set_text(labels[2])
d_ml_ax4.title.set_text(labels[3])
 
plt.show()
In [106]:
fig = go.Figure(go.Funnel(y = mixed_languages_data_movies['Mixed Language'][:10], x = mixed_languages_data_movies['Movies Count'][:10]))
fig.show()